239 files changed, 16327 insertions, 10945 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5abf50e5bd10..ffcb9a09ad73 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -1,4 +1,4 @@
-//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,16 +15,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "AggressiveAntiDepBreaker.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "post-RA-sched"
@@ -34,18 +51,17 @@ static cl::opt<int>
 DebugDiv("agg-antidep-debugdiv",
          cl::desc("Debug control for aggressive anti-dep breaker"),
          cl::init(0), cl::Hidden);
+
 static cl::opt<int>
 DebugMod("agg-antidep-debugmod",
          cl::desc("Debug control for aggressive anti-dep breaker"),
          cl::init(0), cl::Hidden);
 
 AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
-                                               MachineBasicBlock *BB) :
-  NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
-  GroupNodeIndices(TargetRegs, 0),
-  KillIndices(TargetRegs, 0),
-  DefIndices(TargetRegs, 0)
-{
+                                               MachineBasicBlock *BB)
+    : NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+      GroupNodeIndices(TargetRegs, 0), KillIndices(TargetRegs, 0),
+      DefIndices(TargetRegs, 0) {
   const unsigned BBSize = BB->size();
   for (unsigned i = 0; i < NumTargetRegs; ++i) {
     // Initialize all registers to be in their own group. Initially we
@@ -76,8 +92,7 @@ void AggressiveAntiDepState::GetGroupRegs(
   }
 }
 
-unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
-{
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) {
   assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
   assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
 
@@ -92,8 +107,7 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
   return Parent;
 }
 
-unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
-{
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) {
   // Create a new GroupNode for Reg. Reg's existing GroupNode must
   // stay as is because there could be other GroupNodes referring to
   // it.
@@ -103,8 +117,7 @@ unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
   return idx;
 }
 
-bool AggressiveAntiDepState::IsLive(unsigned Reg)
-{
+bool AggressiveAntiDepState::IsLive(unsigned Reg) {
   // KillIndex must be defined and DefIndex not defined for a register
   // to be live.
   return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
@@ -115,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
     TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
     : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
       TII(MF.getSubtarget().getInstrInfo()),
-      TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
-      State(nullptr) {
+      TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) {
   /* Collect a bitset of all registers that are only broken if they
      are on the critical path. */
   for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
@@ -129,7 +141,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
 
   DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
   DEBUG(for (unsigned r : CriticalPathSet.set_bits())
-          dbgs() << " " << TRI->getName(r));
+          dbgs() << " " << printReg(r, TRI));
   DEBUG(dbgs() << '\n');
 }
 
@@ -204,7 +216,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
     // schedule region).
     if (State->IsLive(Reg)) {
       DEBUG(if (State->GetGroup(Reg) != 0)
-              dbgs() << " " << TRI->getName(Reg) << "=g" <<
+              dbgs() << " " << printReg(Reg, TRI) << "=g" <<
                 State->GetGroup(Reg) << "->g0(region live-out)");
       State->UnionGroups(Reg, 0);
     } else if ((DefIndices[Reg] < InsertPosIndex)
@@ -250,7 +262,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(
 
 /// AntiDepEdges - Return in Edges the anti- and output- dependencies
 /// in SU that we want to consider for breaking.
-static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep *> &Edges) {
   SmallSet<unsigned, 4> RegSet;
   for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
@@ -311,7 +323,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
     RegRefs.erase(Reg);
     State->LeaveGroup(Reg);
     DEBUG(if (header) {
-        dbgs() << header << TRI->getName(Reg); header = nullptr; });
+        dbgs() << header << printReg(Reg, TRI); header = nullptr; });
     DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
     // Repeat for subregisters. Note that we only do this if the superregister
     // was not live because otherwise, regardless whether we have an explicit
@@ -325,8 +337,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
         RegRefs.erase(SubregReg);
         State->LeaveGroup(SubregReg);
         DEBUG(if (header) {
-            dbgs() << header << TRI->getName(Reg); header = nullptr; });
-        DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+            dbgs() << header << printReg(Reg, TRI); header = nullptr; });
+        DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" <<
               State->GetGroup(SubregReg) << tag);
       }
     }
@@ -362,7 +374,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
 
-    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+    DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg));
 
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
@@ -381,8 +393,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
       unsigned AliasReg = *AI;
       if (State->IsLive(AliasReg)) {
         State->UnionGroups(Reg, AliasReg);
-        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
-              TRI->getName(AliasReg) << ")");
+        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via "
+                     << printReg(AliasReg, TRI) << ")");
       }
     }
 
@@ -436,11 +448,11 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
   // FIXME: The issue with predicated instruction is more complex. We are being
   // conservatively here because the kill markers cannot be trusted after
   // if-conversion:
-  // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+  // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
   // ...
-  // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
-  // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
-  // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+  // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
+  // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
+  // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
   //
   // The first R6 kill is not really a kill since it's killed by a predicated
   // instruction which may not be executed. The second R6 def may or may not
@@ -457,8 +469,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
 
-    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
-          State->GetGroup(Reg));
+    DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg));
 
     // It wasn't previously live but now it is, this is a kill. Forget
     // the previous live-range information and start a new live-range
@@ -493,10 +504,10 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
       if (Reg == 0) continue;
 
       if (FirstReg != 0) {
-        DEBUG(dbgs() << "=" << TRI->getName(Reg));
+        DEBUG(dbgs() << "=" << printReg(Reg, TRI));
         State->UnionGroups(FirstReg, Reg);
       } else {
-        DEBUG(dbgs() << " " << TRI->getName(Reg));
+        DEBUG(dbgs() << " " << printReg(Reg, TRI));
         FirstReg = Reg;
       }
     }
@@ -544,8 +555,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   // break the anti-dependence.
   std::vector<unsigned> Regs;
   State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
-  assert(Regs.size() > 0 && "Empty register group!");
-  if (Regs.size() == 0)
+  assert(!Regs.empty() && "Empty register group!");
+  if (Regs.empty())
     return false;
 
   // Find the "superest" register in the group. At the same time,
@@ -562,7 +573,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 
     // If Reg has any references, then collect possible rename regs
     if (RegRefs.count(Reg) > 0) {
-      DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+      DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":");
 
       BitVector &BV = RenameRegisterMap[Reg];
       assert(BV.empty());
@@ -571,7 +582,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
       DEBUG({
         dbgs() << " ::";
         for (unsigned r : BV.set_bits())
-          dbgs() << " " << TRI->getName(r);
+          dbgs() << " " << printReg(r, TRI);
         dbgs() << "\n";
       });
     }
@@ -596,8 +607,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     if (renamecnt++ % DebugDiv != DebugMod)
       return false;
 
-    dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
-      " for debug ***\n";
+    dbgs() << "*** Performing rename " << printReg(SuperReg, TRI)
+           << " for debug ***\n";
   }
 #endif
 
@@ -634,7 +645,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
 
-    DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+    DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':');
     RenameMap.clear();
 
     // For each referenced group register (which must be a SuperReg or
@@ -651,7 +662,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
           NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
       }
 
-      DEBUG(dbgs() << " " << TRI->getName(NewReg));
+      DEBUG(dbgs() << " " << printReg(NewReg, TRI));
 
       // Check if Reg can be renamed to NewReg.
       if (!RenameRegisterMap[Reg].test(NewReg)) {
@@ -672,7 +683,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
           unsigned AliasReg = *AI;
           if (State->IsLive(AliasReg) ||
               (KillIndices[Reg] > DefIndices[AliasReg])) {
-            DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+            DEBUG(dbgs() << "(alias " << printReg(AliasReg, TRI) << " live)");
             found = true;
             break;
           }
@@ -732,14 +743,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 
 /// BreakAntiDependencies - Identifiy anti-dependencies within the
 /// ScheduleDAG and break them by renaming registers.
-///
 unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
-                              const std::vector<SUnit>& SUnits,
+                              const std::vector<SUnit> &SUnits,
                               MachineBasicBlock::iterator Begin,
                               MachineBasicBlock::iterator End,
                               unsigned InsertPosIndex,
                               DbgValueVector &DbgValues) {
-
   std::vector<unsigned> &KillIndices = State->GetKillIndices();
   std::vector<unsigned> &DefIndices = State->GetDefIndices();
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
@@ -783,7 +792,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   DEBUG(dbgs() << "Available regs:");
   for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
     if (!State->IsLive(Reg))
-      DEBUG(dbgs() << " " << TRI->getName(Reg));
+      DEBUG(dbgs() << " " << printReg(Reg, TRI));
   }
   DEBUG(dbgs() << '\n');
 #endif
@@ -839,7 +848,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
             (Edge->getKind() != SDep::Output)) continue;
 
         unsigned AntiDepReg = Edge->getReg();
-        DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+        DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
 
         if (!MRI.isAllocatable(AntiDepReg)) {
@@ -942,7 +951,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         std::map<unsigned, unsigned> RenameMap;
         if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
           DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
-                << TRI->getName(AntiDepReg) << ":");
+                       << printReg(AntiDepReg, TRI) << ":");
 
           // Handle each group register...
           for (std::map<unsigned, unsigned>::iterator
@@ -950,9 +959,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
             unsigned CurrReg = S->first;
             unsigned NewReg = S->second;
 
-            DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
-                  TRI->getName(NewReg) << "(" <<
-                  RegRefs.count(CurrReg) << " refs)");
+            DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->"
+                         << printReg(NewReg, TRI) << "("
+                         << RegRefs.count(CurrReg) << " refs)");
 
             // Update the references to the old register CurrReg to
             // refer to the new register NewReg.
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index f97e6666b219..5dce3c2499e5 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -1,4 +1,4 @@
-//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,29 +19,35 @@
 
 #include "AntiDepBreaker.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Compiler.h"
 #include <map>
+#include <set>
+#include <vector>
 
 namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
 class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterClass;
+class TargetRegisterInfo;
 
   /// Contains all the state necessary for anti-dep breaking.
 class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
   public:
     /// Information about a register reference within a liverange
-    typedef struct {
+    struct RegisterReference {
       /// The registers operand
       MachineOperand *Operand;
+
       /// The register class
       const TargetRegisterClass *RC;
-    } RegisterReference;
+    };
 
   private:
     /// Number of non-virtual target registers (i.e. TRI->getNumRegs()).
@@ -110,7 +116,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
 
   class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker
       : public AntiDepBreaker {
-    MachineFunction& MF;
+    MachineFunction &MF;
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
@@ -121,10 +127,10 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
     BitVector CriticalPathSet;
 
     /// The state used to identify and rename anti-dependence registers.
-    AggressiveAntiDepState *State;
+    AggressiveAntiDepState *State = nullptr;
 
   public:
-    AggressiveAntiDepBreaker(MachineFunction& MFi,
+    AggressiveAntiDepBreaker(MachineFunction &MFi,
                           const RegisterClassInfo &RCI,
                           TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker() override;
@@ -134,8 +140,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
 
     /// Identifiy anti-dependencies along the critical path
     /// of the ScheduleDAG and break them by renaming registers.
-    ///
-    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+    unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
                                    MachineBasicBlock::iterator Begin,
                                    MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex,
@@ -143,7 +148,6 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
 
     /// Update liveness information to account for the current
     /// instruction, which will not be scheduled.
-    ///
     void Observe(MachineInstr &MI, unsigned Count,
                  unsigned InsertPosIndex) override;
 
@@ -152,7 +156,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
 
   private:
     /// Keep track of a position in the allocation order for each regclass.
-    typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
+    using RenameOrderType = std::map<const TargetRegisterClass *, unsigned>;
 
     /// Return true if MO represents a register
     /// that is both implicitly used and defined in MI
@@ -174,6 +178,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
                                    RenameOrderType& RenameOrder,
                                    std::map<unsigned, unsigned> &RenameMap);
   };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index d840a2f69ab3..8e8c1d8e08d1 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -31,18 +31,19 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
                                  const VirtRegMap &VRM,
                                  const RegisterClassInfo &RegClassInfo,
                                  const LiveRegMatrix *Matrix)
-  : Pos(0) {
+  : Pos(0), HardHints(false) {
   const MachineFunction &MF = VRM.getMachineFunction();
   const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
   Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
-  TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
+  if (TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix))
+    HardHints = true;
   rewind();
 
   DEBUG({
     if (!Hints.empty()) {
       dbgs() << "hints:";
       for (unsigned I = 0, E = Hints.size(); I != E; ++I)
-        dbgs() << ' ' << PrintReg(Hints[I], TRI);
+        dbgs() << ' ' << printReg(Hints[I], TRI);
       dbgs() << '\n';
     }
   });
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 8223a52e333b..467bcc2edc6f 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -32,7 +32,11 @@ class LLVM_LIBRARY_VISIBILITY AllocationOrder {
   ArrayRef<MCPhysReg> Order;
   int Pos;
 
+  // If HardHints is true, *only* Hints will be returned.
+  bool HardHints;
+
 public:
+
   /// Create a new AllocationOrder for VirtReg.
   /// @param VirtReg      Virtual register to allocate for.
   /// @param VRM          Virtual register map for function.
@@ -51,6 +55,8 @@ public:
   unsigned next(unsigned Limit = 0) {
     if (Pos < 0)
       return Hints.end()[Pos++];
+    if (HardHints)
+      return 0;
     if (!Limit)
       Limit = Order.size();
     while (Pos < int(Limit)) {
@@ -68,6 +74,8 @@ public:
   unsigned nextWithDups(unsigned Limit) {
     if (Pos < 0)
       return Hints.end()[Pos++];
+    if (HardHints)
+      return 0;
     if (Pos < int(Limit))
       return Order[Pos++];
     return 0;
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index c2aecc651b79..0731ae575437 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -14,7 +14,9 @@
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -24,9 +26,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 
 using namespace llvm;
@@ -565,6 +564,24 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
     return false;
 
   const Value *RetVal = Ret->getOperand(0), *CallVal = I;
+  // Intrinsic like llvm.memcpy has no return value, but the expanded
+  // libcall may or may not have return value. On most platforms, it
+  // will be expanded as memcpy in libc, which returns the first
+  // argument. On other platforms like arm-none-eabi, memcpy may be
+  // expanded as library call without return value, like __aeabi_memcpy.
+  const CallInst *Call = cast<CallInst>(I);
+  if (Function *F = Call->getCalledFunction()) {
+    Intrinsic::ID IID = F->getIntrinsicID();
+    if (((IID == Intrinsic::memcpy &&
+          TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) ||
+         (IID == Intrinsic::memmove &&
+          TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
+         (IID == Intrinsic::memset &&
+          TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
+        RetVal == Call->getArgOperand(0))
+      return true;
+  }
+
   SmallVector<unsigned, 4> RetPath, CallPath;
   SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;
 
@@ -651,7 +668,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
 
   int EntryBBNumber = MF.front().getNumber();
   bool IsSEH = isAsynchronousEHPersonality(
-      classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+      classifyEHPersonality(MF.getFunction().getPersonalityFn()));
 
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index d14d93100adb..181da83dc88b 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -1,4 +1,4 @@
-//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,12 +15,14 @@
 #ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
 #define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
 
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <utility>
 #include <vector>
 
 namespace llvm {
@@ -29,17 +31,17 @@ namespace llvm {
 /// registers to break register anti-dependencies (WAR hazards).
 class LLVM_LIBRARY_VISIBILITY AntiDepBreaker {
 public:
-  typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > 
-    DbgValueVector;
+  using DbgValueVector =
+      std::vector<std::pair<MachineInstr *, MachineInstr *>>;
 
   virtual ~AntiDepBreaker();
 
   /// Initialize anti-dep breaking for a new basic block.
-  virtual void StartBlock(MachineBasicBlock *BB) =0;
+  virtual void StartBlock(MachineBasicBlock *BB) = 0;
 
   /// Identifiy anti-dependencies within a basic-block region and break them by
   /// renaming registers. Return the number of anti-dependencies broken.
-  virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+  virtual unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
                                          MachineBasicBlock::iterator Begin,
                                          MachineBasicBlock::iterator End,
                                          unsigned InsertPosIndex,
@@ -51,7 +53,7 @@ public:
                        unsigned InsertPosIndex) = 0;
 
   /// Finish anti-dep breaking for a basic block.
-  virtual void FinishBlock() =0;
+  virtual void FinishBlock() = 0;
 
   /// Update DBG_VALUE if dependency breaker is updating
   /// other machine instruction to use NewReg.
@@ -81,6 +83,6 @@ public:
   }
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 8b1376ab363d..15cfbd5c40ff 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -12,26 +12,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
@@ -67,16 +60,16 @@ void ARMException::beginFunction(const MachineFunction *MF) {
 ///
 void ARMException::endFunction(const MachineFunction *MF) {
   ARMTargetStreamer &ATS = getTargetStreamer();
-  const Function *F = MF->getFunction();
+  const Function &F = MF->getFunction();
   const Function *Per = nullptr;
-  if (F->hasPersonalityFn())
-    Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+  if (F.hasPersonalityFn())
+    Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
   bool forceEmitPersonality =
-    F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
-    F->needsUnwindTableEntry();
+    F.hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+    F.needsUnwindTableEntry();
   bool shouldEmitPersonality = forceEmitPersonality ||
     !MF->getLandingPads().empty();
-  if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
+  if (!Asm->MF->getFunction().needsUnwindTableEntry() &&
       !shouldEmitPersonality)
     ATS.emitCantUnwind();
   else if (shouldEmitPersonality) {
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index ec552e0640e9..59ed0324bdb0 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===//
+//===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,9 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "AddressPool.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <utility>
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index ba3e3b7c315d..990a158d87cd 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===//
+//===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,11 +11,13 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCSymbol.h"
 
 namespace llvm {
-class MCSection;
+
 class AsmPrinter;
+class MCSection;
+class MCSymbol;
+
 // Collection of addresses for this unit and assorted labels.
 // A Symbol->unsigned mapping of addresses used by indirect
 // references.
@@ -23,6 +25,7 @@ class AddressPool {
   struct AddressPoolEntry {
     unsigned Number;
     bool TLS;
+
     AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {}
   };
   DenseMap<const MCSymbol *, AddressPoolEntry> Pool;
@@ -31,10 +34,10 @@ class AddressPool {
   /// the last "resetUsedFlag" call. Used to implement type unit fallback - a
   /// type that references addresses cannot be placed in a type unit when using
   /// fission.
-  bool HasBeenUsed;
+  bool HasBeenUsed = false;
 
 public:
-  AddressPool() : HasBeenUsed(false) {}
+  AddressPool() = default;
 
   /// \brief Returns the index into the address pool with the given
   /// label/symbol.
@@ -48,5 +51,7 @@ public:
 
   void resetUsedFlag() { HasBeenUsed = false; }
 };
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index ff427c9a0d75..31037095aa2b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,10 +29,11 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/ObjectUtils.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCStrategy.h"
@@ -46,10 +47,19 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -62,14 +72,18 @@
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodePadder.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
@@ -78,29 +92,28 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 #include <cassert>
 #include <cinttypes>
 #include <cstdint>
+#include <iterator>
 #include <limits>
 #include <memory>
 #include <string>
@@ -129,7 +142,8 @@ static cl::opt<bool>
 
 char AsmPrinter::ID = 0;
 
-typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
+using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>;
+
 static gcp_map_type &getGCMap(void *&P) {
   if (!P)
     P = new gcp_map_type();
@@ -184,7 +198,6 @@ bool AsmPrinter::isPositionIndependent() const {
 }
 
 /// getFunctionNumber - Return a unique ID for the current function.
-///
 unsigned AsmPrinter::getFunctionNumber() const {
   return MF->getFunctionNumber();
 }
@@ -221,8 +234,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineModuleInfo>();
   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
   AU.addRequired<GCModuleInfo>();
-  if (isVerbose())
-    AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<MachineLoopInfo>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
@@ -242,28 +254,8 @@ bool AsmPrinter::doInitialization(Module &M) {
   // alternative is duplicated code in each of the target asm printers that
   // use the directive, where it would need the same conditionalization
   // anyway.
-  const Triple &TT = TM.getTargetTriple();
-  // If there is a version specified, Major will be non-zero.
-  if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
-    unsigned Major, Minor, Update;
-    MCVersionMinType VersionType;
-    if (TT.isWatchOS()) {
-      VersionType = MCVM_WatchOSVersionMin;
-      TT.getWatchOSVersion(Major, Minor, Update);
-    } else if (TT.isTvOS()) {
-      VersionType = MCVM_TvOSVersionMin;
-      TT.getiOSVersion(Major, Minor, Update);
-    } else if (TT.isMacOSX()) {
-      VersionType = MCVM_OSXVersionMin;
-      if (!TT.getMacOSXVersion(Major, Minor, Update))
-        Major = 0;
-    } else {
-      VersionType = MCVM_IOSVersionMin;
-      TT.getiOSVersion(Major, Minor, Update);
-    }
-    if (Major != 0)
-      OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
-  }
+  const Triple &Target = TM.getTargetTriple();
+  OutStreamer->EmitVersionForTarget(Target);
 
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
@@ -272,7 +264,8 @@ bool AsmPrinter::doInitialization(Module &M) {
   // don't, this at least helps the user find where a global came from.
   if (MAI->hasSingleParameterDotFile()) {
     // .file "foo.c"
-    OutStreamer->EmitFileDirective(M.getSourceFileName());
+    OutStreamer->EmitFileDirective(
+        llvm::sys::path::filename(M.getSourceFileName()));
   }
 
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -628,35 +621,35 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
 /// EmitFunctionHeader - This method emits the header for the current
 /// function.
 void AsmPrinter::EmitFunctionHeader() {
-  const Function *F = MF->getFunction();
+  const Function &F = MF->getFunction();
 
   if (isVerbose())
     OutStreamer->GetCommentOS()
         << "-- Begin function "
-        << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n';
+        << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n';
 
   // Print out constants referenced by the function
   EmitConstantPool();
 
   // Print the 'header' of function.
-  OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM));
-  EmitVisibility(CurrentFnSym, F->getVisibility());
+  OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM));
+  EmitVisibility(CurrentFnSym, F.getVisibility());
 
-  EmitLinkage(F, CurrentFnSym);
+  EmitLinkage(&F, CurrentFnSym);
   if (MAI->hasFunctionAlignment())
-    EmitAlignment(MF->getAlignment(), F);
+    EmitAlignment(MF->getAlignment(), &F);
 
   if (MAI->hasDotTypeDotSizeDirective())
     OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
 
   if (isVerbose()) {
-    F->printAsOperand(OutStreamer->GetCommentOS(),
-                   /*PrintType=*/false, F->getParent());
+    F.printAsOperand(OutStreamer->GetCommentOS(),
+                   /*PrintType=*/false, F.getParent());
     OutStreamer->GetCommentOS() << '\n';
   }
 
   // Emit the prefix data.
-  if (F->hasPrefixData()) {
+  if (F.hasPrefixData()) {
     if (MAI->hasSubsectionsViaSymbols()) {
       // Preserving prefix data on platforms which use subsections-via-symbols
       // is a bit tricky. Here we introduce a symbol for the prefix data
@@ -665,12 +658,12 @@ void AsmPrinter::EmitFunctionHeader() {
       MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
       OutStreamer->EmitLabel(PrefixSym);
 
-      EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+      EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
 
       // Emit an .alt_entry directive for the actual function symbol.
       OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
     } else {
-      EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+      EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData());
     }
   }
 
@@ -682,7 +675,7 @@ void AsmPrinter::EmitFunctionHeader() {
   // references to the dangling symbols.  Emit them at the start of the function
   // so that we don't get references to undefined symbols.
   std::vector<MCSymbol*> DeadBlockSyms;
-  MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+  MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
   for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
     OutStreamer->AddComment("Address taken block that was later removed");
     OutStreamer->EmitLabel(DeadBlockSyms[i]);
@@ -707,8 +700,8 @@ void AsmPrinter::EmitFunctionHeader() {
   }
 
   // Emit the prologue data.
-  if (F->hasPrologueData())
-    EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData());
+  if (F.hasPrologueData())
+    EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -729,9 +722,11 @@ void AsmPrinter::EmitFunctionEntryLabel() {
 }
 
 /// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
+/// It returns true iff the sched comment was emitted.
+///   Otherwise it returns false.
+static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
                          AsmPrinter *AP) {
-  const MachineFunction *MF = MI.getParent()->getParent();
+  const MachineFunction *MF = MI.getMF();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
   // Check for spills and reloads
@@ -773,12 +768,16 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
     CommentOS << " Reload Reuse";
   }
 
-  if (Commented && AP->EnablePrintSchedInfo)
-    // If any comment was added above and we need sched info comment then
-    // add this new comment just after the above comment w/o "\n" between them.
-    CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
-  else if (Commented)
+  if (Commented) {
+    if (AP->EnablePrintSchedInfo) {
+      // If any comment was added above and we need sched info comment then add
+      // this new comment just after the above comment w/o "\n" between them.
+      CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
+      return true;
+    }
     CommentOS << "\n";
+  }
+  return false;
 }
 
 /// emitImplicitDef - This method emits the specified machine instruction
@@ -789,7 +788,7 @@ void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
   SmallString<128> Str;
   raw_svector_ostream OS(Str);
   OS << "implicit-def: "
-     << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo());
+     << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
 
   OutStreamer->AddComment(OS.str());
   OutStreamer->AddBlankLine();
@@ -802,10 +801,8 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &Op = MI->getOperand(i);
     assert(Op.isReg() && "KILL instruction must have only register operands");
-    OS << ' '
-       << PrintReg(Op.getReg(),
-                   AP.MF->getSubtarget().getRegisterInfo())
-       << (Op.isDef() ? "<def>" : "<kill>");
+    OS << ' ' << (Op.isDef() ? "def " : "killed ")
+       << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
   }
   AP.OutStreamer->AddComment(OS.str());
   AP.OutStreamer->AddBlankLine();
@@ -890,7 +887,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
     }
     if (MemLoc)
       OS << '[';
-    OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
+    OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
   }
 
   if (MemLoc)
@@ -901,9 +898,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   return true;
 }
 
-AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
   if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
-      MF->getFunction()->needsUnwindTableEntry())
+      MF->getFunction().needsUnwindTableEntry())
     return CFI_M_EH;
 
   if (MMI->hasDebugInfo())
@@ -913,7 +910,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
 }
 
 bool AsmPrinter::needsSEHMoves() {
-  return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry();
+  return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry();
 }
 
 void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
@@ -951,6 +948,31 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
                              MCConstantExpr::create(FrameOffset, OutContext));
 }
 
+void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
+  if (!MF.getTarget().Options.EmitStackSizeSection)
+    return;
+
+  MCSection *StackSizeSection = getObjFileLowering().getStackSizesSection();
+  if (!StackSizeSection)
+    return;
+
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  // Don't emit functions with dynamic stack allocations.
+  if (FrameInfo.hasVarSizedObjects())
+    return;
+
+  OutStreamer->PushSection();
+  OutStreamer->SwitchSection(StackSizeSection);
+
+  const MCSymbol *FunctionSymbol = getSymbol(&MF.getFunction());
+  uint64_t StackSize = FrameInfo.getStackSize();
+  OutStreamer->EmitValue(MCSymbolRefExpr::create(FunctionSymbol, OutContext),
+                         /* size = */ 8);
+  OutStreamer->EmitULEB128IntValue(StackSize);
+
+  OutStreamer->PopSection();
+}
+
 static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
                                            MachineModuleInfo *MMI) {
   if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo())
@@ -958,10 +980,10 @@ static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
 
   // We might emit an EH table that uses function begin and end labels even if
   // we don't have any landingpads.
-  if (!MF.getFunction()->hasPersonalityFn())
+  if (!MF.getFunction().hasPersonalityFn())
     return false;
   return !isNoOpWithoutInvoke(
-      classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+      classifyEHPersonality(MF.getFunction().getPersonalityFn()));
 }
 
 /// EmitFunctionBody - This method emits the body and trailer for a
@@ -981,7 +1003,6 @@ void AsmPrinter::EmitFunctionBody() {
     // Print a label for the basic block.
     EmitBasicBlockStart(MBB);
     for (auto &MI : MBB) {
-
       // Print the assembly for the instruction.
       if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
           !MI.isDebugValue()) {
@@ -998,18 +1019,18 @@ void AsmPrinter::EmitFunctionBody() {
         }
       }
 
-      if (isVerbose())
-        emitComments(MI, OutStreamer->GetCommentOS(), this);
+      if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) {
+        MachineInstr *MIP = const_cast<MachineInstr *>(&MI);
+        MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment);
+      }
 
       switch (MI.getOpcode()) {
       case TargetOpcode::CFI_INSTRUCTION:
         emitCFIInstruction(MI);
         break;
-
       case TargetOpcode::LOCAL_ESCAPE:
         emitFrameAlloc(MI);
         break;
-
       case TargetOpcode::EH_LABEL:
       case TargetOpcode::GC_LABEL:
         OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
@@ -1049,7 +1070,7 @@ void AsmPrinter::EmitFunctionBody() {
 
   EmittedInsts += NumInstsInFunction;
   MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount",
-                                      MF->getFunction()->getSubprogram(),
+                                      MF->getFunction().getSubprogram(),
                                       &MF->front());
   R << ore::NV("NumInstructions", NumInstsInFunction)
     << " instructions in function";
@@ -1077,8 +1098,8 @@ void AsmPrinter::EmitFunctionBody() {
     }
   }
 
-  const Function *F = MF->getFunction();
-  for (const auto &BB : *F) {
+  const Function &F = MF->getFunction();
+  for (const auto &BB : F) {
     if (!BB.hasAddressTaken())
       continue;
     MCSymbol *Sym = GetBlockAddressSymbol(&BB);
@@ -1125,6 +1146,9 @@ void AsmPrinter::EmitFunctionBody() {
     HI.Handler->endFunction(MF);
   }
 
+  // Emit section containing stack size metadata.
+  emitStackSizeSection(*MF);
+
   if (isVerbose())
     OutStreamer->GetCommentOS() << "-- End function\n";
 
@@ -1380,6 +1404,16 @@ bool AsmPrinter::doFinalization(Module &M) {
                                  PtrSize);
   }
 
+  // Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if
+  // split-stack is used.
+  if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) {
+    OutStreamer->SwitchSection(
+        OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0));
+    if (MMI->hasNosplitStack())
+      OutStreamer->SwitchSection(
+          OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0));
+  }
+
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
   Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
@@ -1408,7 +1442,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() {
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   // Get the function symbol.
-  CurrentFnSym = getSymbol(MF.getFunction());
+  CurrentFnSym = getSymbol(&MF.getFunction());
   CurrentFnSymForSize = CurrentFnSym;
   CurrentFnBegin = nullptr;
   CurExceptionSym = nullptr;
@@ -1420,8 +1454,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   }
 
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
-  if (isVerbose())
-    LI = &getAnalysis<MachineLoopInfo>();
+  LI = &getAnalysis<MachineLoopInfo>();
 
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
@@ -1446,7 +1479,6 @@ namespace {
 /// representations of the constants in the constant pool MCP. This is
 /// used to print out constants which have been "spilled to memory" by
 /// the code generator.
-///
 void AsmPrinter::EmitConstantPool() {
   const MachineConstantPool *MCP = MF->getConstantPool();
   const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
@@ -1526,7 +1558,6 @@ void AsmPrinter::EmitConstantPool() {
 
 /// EmitJumpTableInfo - Print assembly representations of the jump tables used
 /// by the current function to the current output stream.
-///
 void AsmPrinter::EmitJumpTableInfo() {
   const DataLayout &DL = MF->getDataLayout();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
@@ -1537,14 +1568,14 @@ void AsmPrinter::EmitJumpTableInfo() {
 
   // Pick the directive to use to print the jump table entries, and switch to
   // the appropriate section.
-  const Function *F = MF->getFunction();
+  const Function &F = MF->getFunction();
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
   bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
       MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
-      *F);
+      F);
   if (JTInDiffSection) {
     // Drop it in the readonly section.
-    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM);
+    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM);
     OutStreamer->SwitchSection(ReadOnlySection);
   }
 
@@ -1723,7 +1754,7 @@ struct Structor {
   Structor() = default;
 };
 
-}  // end anonymous namespace
+} // end anonymous namespace
 
 /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
 /// priority.
@@ -1818,13 +1849,11 @@ void AsmPrinter::EmitInt8(int Value) const {
 }
 
 /// EmitInt16 - Emit a short directive and value.
-///
 void AsmPrinter::EmitInt16(int Value) const {
   OutStreamer->EmitIntValue(Value, 2);
 }
 
 /// EmitInt32 - Emit a long directive and value.
-///
 void AsmPrinter::EmitInt32(int Value) const {
   OutStreamer->EmitIntValue(Value, 4);
 }
@@ -1866,7 +1895,6 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
 // byte alignment.  If a global value is specified, and if that global has
 // an explicit alignment requested, it will override the alignment request
 // if required for correctness.
-//
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
   if (GV)
     NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
@@ -1921,7 +1949,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
       raw_string_ostream OS(S);
       OS << "Unsupported expression in static initializer: ";
       CE->printAsOperand(OS, /*PrintType=*/false,
-                     !MF ? nullptr : MF->getFunction()->getParent());
+                     !MF ? nullptr : MF->getFunction().getParent());
       report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
@@ -2317,7 +2345,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
   //
   //  cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
   //    gotpcrelcst := <offset from @foo base> + <cst>
-  //
   MCValue MV;
   if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
     return;
@@ -2348,7 +2375,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
   // If gotpcrelcst is positive it means that we can safely fold the pc rel
   // displacement into the GOTPCREL. We can also can have an extra offset <cst>
   // if the target knows how to encode it.
-  //
   int64_t GOTPCRelCst = Offset + MV.getConstant();
   if (GOTPCRelCst < 0)
     return;
@@ -2370,7 +2396,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
   //    .long 42
   //  foo:
   //    .long bar@GOTPCREL+<gotpcrelcst>
-  //
   AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
   const GlobalVariable *GV = Result.first;
   int NumUses = (int)Result.second;
@@ -2550,7 +2575,6 @@ static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
     << " Depth=" << Loop->getLoopDepth() << '\n';
 }
 
-
 /// PrintChildLoopComment - Print comments about child loops within
 /// the loop for this basic block, with nesting.
 static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
@@ -2603,6 +2627,23 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
   PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
 }
 
+void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
+                                         MCCodePaddingContext &Context) const {
+  assert(MF != nullptr && "Machine function must be valid");
+  assert(LI != nullptr && "Loop info must be valid");
+  Context.IsPaddingActive = !MF->hasInlineAsm() &&
+                            !MF->getFunction().optForSize() &&
+                            TM.getOptLevel() != CodeGenOpt::None;
+  const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB);
+  Context.IsBasicBlockInsideInnermostLoop =
+      CurrentLoop != nullptr && CurrentLoop->getSubLoops().empty();
+  Context.IsBasicBlockReachableViaFallthrough =
+      std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
+      MBB.pred_end();
+  Context.IsBasicBlockReachableViaBranch =
+      MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB);
+}
+
 /// EmitBasicBlockStart - This method prints the label for the specified
 /// MachineBasicBlock, an alignment (if present) and a comment describing
 /// it if appropriate.
@@ -2618,6 +2659,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
   // Emit an alignment directive for this block, if needed.
   if (unsigned Align = MBB.getAlignment())
     EmitAlignment(Align);
+  MCCodePaddingContext Context;
+  setupCodePaddingContext(MBB, Context);
+  OutStreamer->EmitCodePaddingBasicBlockStart(Context);
 
   // If the block has its address taken, emit any labels that were used to
   // reference the block.  It is possible that there is more than one label
@@ -2652,13 +2696,20 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
       (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
     if (isVerbose()) {
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
+      OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
+                                  false);
     }
   } else {
     OutStreamer->EmitLabel(MBB.getSymbol());
   }
 }
 
+void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {
+  MCCodePaddingContext Context;
+  setupCodePaddingContext(MBB, Context);
+  OutStreamer->EmitCodePaddingBasicBlockEnd(Context);
+}
+
 void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
                                 bool IsDefinition) const {
   MCSymbolAttr Attr = MCSA_Invalid;
@@ -2765,10 +2816,13 @@ void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
   Out->EmitSymbolValue(Sled, Bytes);
   Out->EmitSymbolValue(CurrentFnSym, Bytes);
   auto Kind8 = static_cast<uint8_t>(Kind);
-  Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
-  Out->EmitBytes(
+  Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+  Out->EmitBinaryData(
       StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
-  Out->EmitZeros(2 * Bytes - 2);  // Pad the previous two entries
+  Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1));
+  auto Padding = (4 * Bytes) - ((2 * Bytes) + 3);
+  assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size");
+  Out->EmitZeros(Padding);
 }
 
 void AsmPrinter::emitXRayTable() {
@@ -2776,23 +2830,26 @@ void AsmPrinter::emitXRayTable() {
     return;
 
   auto PrevSection = OutStreamer->getCurrentSectionOnly();
-  auto Fn = MF->getFunction();
+  const Function &F = MF->getFunction();
   MCSection *InstMap = nullptr;
   MCSection *FnSledIndex = nullptr;
   if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
-    if (Fn->hasComdat()) {
-      InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-                                         Fn->getComdat()->getName());
-      FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS,
-                                             ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
-                                             Fn->getComdat()->getName());
-    } else {
-      InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC);
-      FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS,
-                                             ELF::SHF_ALLOC);
+    auto Associated = dyn_cast<MCSymbolELF>(CurrentFnSym);
+    assert(Associated != nullptr);
+    auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
+    std::string GroupName;
+    if (F.hasComdat()) {
+      Flags |= ELF::SHF_GROUP;
+      GroupName = F.getComdat()->getName();
     }
+
+    auto UniqueID = ++XRayFnUniqueID;
+    InstMap =
+        OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, Flags, 0,
+                                 GroupName, UniqueID, Associated);
+    FnSledIndex =
+        OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0,
+                                 GroupName, UniqueID, Associated);
   } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
     InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
                                          SectionKind::getReadOnlyWithRel());
@@ -2802,15 +2859,7 @@ void AsmPrinter::emitXRayTable() {
     llvm_unreachable("Unsupported target");
   }
 
-  // Before we switch over, we force a reference to a label inside the
-  // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept
-  // live by the linker if the function is not garbage-collected. Since this
-  // function is always called just before the function's end, we assume that
-  // this is happening after the last return instruction.
   auto WordSizeBytes = MAI->getCodePointerSize();
-  MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true);
-  OutStreamer->EmitCodeAlignment(16);
-  OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false);
 
   // Now we switch to the instrumentation map section. Because this is done
   // per-function, we are able to create an index entry that will represent the
@@ -2829,24 +2878,23 @@ void AsmPrinter::emitXRayTable() {
   // pointers. This should work for both 32-bit and 64-bit platforms.
   OutStreamer->SwitchSection(FnSledIndex);
   OutStreamer->EmitCodeAlignment(2 * WordSizeBytes);
-  OutStreamer->EmitLabel(IdxRef);
-  OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes);
-  OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes);
+  OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false);
+  OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes, false);
   OutStreamer->SwitchSection(PrevSection);
   Sleds.clear();
 }
 
 void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
-  SledKind Kind) {
-  auto Fn = MI.getParent()->getParent()->getFunction();
-  auto Attr = Fn->getFnAttribute("function-instrument");
-  bool LogArgs = Fn->hasFnAttribute("xray-log-args");
+                            SledKind Kind, uint8_t Version) {
+  const Function &F = MI.getMF()->getFunction();
+  auto Attr = F.getFnAttribute("function-instrument");
+  bool LogArgs = F.hasFnAttribute("xray-log-args");
   bool AlwaysInstrument =
     Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
   if (Kind == SledKind::FUNCTION_ENTER && LogArgs)
     Kind = SledKind::LOG_ARGS_ENTER;
-  Sleds.emplace_back(
-    XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
+  Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind,
+                                       AlwaysInstrument, &F, Version});
 }
 
 uint16_t AsmPrinter::getDwarfVersion() const {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 0edf9051d342..08eb14e242c5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,14 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "ByteStreamer.h"
-#include "DwarfDebug.h"
-#include "DwarfExpression.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -28,9 +26,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
@@ -48,12 +44,19 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
 }
 
 /// EmitULEB128 - emit the specified unsigned leb128 value.
-void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
-                             unsigned PadTo) const {
+void AsmPrinter::EmitPaddedULEB128(uint64_t Value, unsigned PadTo,
+                                   const char *Desc) const {
   if (isVerbose() && Desc)
     OutStreamer->AddComment(Desc);
 
-  OutStreamer->EmitULEB128IntValue(Value, PadTo);
+  OutStreamer->EmitPaddedULEB128IntValue(Value, PadTo);
+}
+
+void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
+  if (isVerbose() && Desc)
+    OutStreamer->AddComment(Desc);
+
+  OutStreamer->EmitULEB128IntValue(Value);
 }
 
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
@@ -212,6 +215,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   case MCCFIInstruction::OpEscape:
     OutStreamer->EmitCFIEscape(Inst.getValues());
     break;
+  case MCCFIInstruction::OpRestore:
+    OutStreamer->EmitCFIRestore(Inst.getRegister());
+    break;
   }
 }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index eae79ad101d3..04a72ba3d738 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -17,6 +17,9 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
@@ -32,10 +35,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
@@ -144,6 +144,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
                        " we don't have an asm parser for this target\n");
   Parser->setAssemblerDialect(Dialect);
   Parser->setTargetParser(*TAP.get());
+  Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
   if (Dialect == InlineAsm::AD_Intel)
     // We need this flag to be able to parse numbers like "0bH"
     Parser->setParsingInlineAsm(true);
@@ -513,7 +514,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   // Reset SanitizeAddress based on the function's attribute.
   MCTargetOptions MCOptions = TM.Options.MCOptions;
   MCOptions.SanitizeAddress =
-      MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress);
+      MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
 
   EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
                 MI->getInlineAsmDialect());
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index a81d56e9618b..1d0a003dc50a 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeViewDebug.h"
+#include "DwarfExpression.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -19,9 +20,9 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/ADT/Triple.h"
@@ -34,9 +35,14 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -61,17 +67,15 @@
 #include "llvm/Support/BinaryByteStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/SMLoc.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cctype>
@@ -86,6 +90,9 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
+static cl::opt<bool> EmitDebugGlobalHashes("emit-codeview-ghash-section",
+                                           cl::ReallyHidden, cl::init(false));
+
 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
     : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
   // If module doesn't have named metadata anchors or COFF debug section
@@ -153,12 +160,19 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
 }
 
 unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
+  StringRef FullPath = getFullFilepath(F);
   unsigned NextId = FileIdMap.size() + 1;
-  auto Insertion = FileIdMap.insert(std::make_pair(F, NextId));
+  auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId));
   if (Insertion.second) {
     // We have to compute the full filepath and emit a .cv_file directive.
-    StringRef FullPath = getFullFilepath(F);
-    bool Success = OS.EmitCVFileDirective(NextId, FullPath);
+    std::string Checksum = fromHex(F->getChecksum());
+    void *CKMem = OS.getContext().allocate(Checksum.size(), 1);
+    memcpy(CKMem, Checksum.data(), Checksum.size());
+    ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
+                                      Checksum.size());
+    DIFile::ChecksumKind ChecksumKind = F->getChecksumKind();
+    bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
+                                          static_cast<unsigned>(ChecksumKind));
     (void)Success;
     assert(Success && ".cv_file directive failed");
   }
@@ -270,7 +284,7 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
   // Build the fully qualified name of the scope.
   std::string ScopeName = getFullyQualifiedName(Scope);
   StringIdRecord SID(TypeIndex(), ScopeName);
-  auto TI = TypeTable.writeKnownType(SID);
+  auto TI = TypeTable.writeLeafType(SID);
   return recordTypeIndexForDINode(Scope, TI);
 }
 
@@ -295,12 +309,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
     TypeIndex ClassType = getTypeIndex(Class);
     MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
                                DisplayName);
-    TI = TypeTable.writeKnownType(MFuncId);
+    TI = TypeTable.writeLeafType(MFuncId);
   } else {
     // Otherwise, this must be a free function.
     TypeIndex ParentScope = getScopeIndex(Scope);
     FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
-    TI = TypeTable.writeKnownType(FuncId);
+    TI = TypeTable.writeLeafType(FuncId);
   }
 
   return recordTypeIndexForDINode(SP, TI);
@@ -324,8 +338,9 @@ TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
   // function type, as the complete class type is likely to reference this
   // member function type.
   TypeLoweringScope S(*this);
-  TypeIndex TI =
-      lowerTypeMemberFunction(SP->getType(), Class, SP->getThisAdjustment());
+  const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0;
+  TypeIndex TI = lowerTypeMemberFunction(
+      SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod);
   return recordTypeIndexForDINode(SP, TI, Class);
 }
 
@@ -476,10 +491,13 @@ void CodeViewDebug::endModule() {
   OS.AddComment("String table");
   OS.EmitCVStringTableDirective();
 
-  // Emit type information last, so that any types we translate while emitting
-  // function info are included.
+  // Emit type information and hashes last, so that any types we translate while
+  // emitting function info are included.
   emitTypeInformation();
 
+  if (EmitDebugGlobalHashes)
+    emitTypeGlobalHashes();
+
   clear();
 }
 
@@ -496,11 +514,6 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
 }
 
 void CodeViewDebug::emitTypeInformation() {
-  // Do nothing if we have no debug info or if no non-trivial types were emitted
-  // to TypeTable during codegen.
-  NamedMDNode *CU_Nodes = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
-  if (!CU_Nodes)
-    return;
   if (TypeTable.empty())
     return;
 
@@ -545,7 +558,39 @@ void CodeViewDebug::emitTypeInformation() {
   }
 }
 
-namespace {
+void CodeViewDebug::emitTypeGlobalHashes() {
+  if (TypeTable.empty())
+    return;
+
+  // Start the .debug$H section with the version and hash algorithm, currently
+  // hardcoded to version 0, SHA1.
+  OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
+
+  OS.EmitValueToAlignment(4);
+  OS.AddComment("Magic");
+  OS.EmitIntValue(COFF::DEBUG_HASHES_SECTION_MAGIC, 4);
+  OS.AddComment("Section Version");
+  OS.EmitIntValue(0, 2);
+  OS.AddComment("Hash Algorithm");
+  OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1), 2);
+
+  TypeIndex TI(TypeIndex::FirstNonSimpleIndex);
+  for (const auto &GHR : TypeTable.hashes()) {
+    if (OS.isVerboseAsm()) {
+      // Emit an EOL-comment describing which TypeIndex this hash corresponds
+      // to, as well as the stringified SHA1 hash.
+      SmallString<32> Comment;
+      raw_svector_ostream CommentOS(Comment);
+      CommentOS << formatv("{0:X+} [{1}]", TI.getIndex(), GHR);
+      OS.AddComment(Comment);
+      ++TI;
+    }
+    assert(GHR.Hash.size() % 20 == 0);
+    StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()),
+                GHR.Hash.size());
+    OS.EmitBinaryData(S);
+  }
+}
 
 static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
   switch (DWLang) {
@@ -572,6 +617,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
     return SourceLanguage::Cobol;
   case dwarf::DW_LANG_Java:
     return SourceLanguage::Java;
+  case dwarf::DW_LANG_D:
+    return SourceLanguage::D;
   default:
     // There's no CodeView representation for this language, and CV doesn't
     // have an "unknown" option for the language field, so we'll use MASM,
@@ -580,9 +627,11 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
   }
 }
 
+namespace {
 struct Version {
   int Part[4];
 };
+} // end anonymous namespace
 
 // Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out
 // the version number.
@@ -605,20 +654,19 @@ static Version parseVersion(StringRef Name) {
 
 static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
   switch (Type) {
-    case Triple::ArchType::x86:
-      return CPUType::Pentium3;
-    case Triple::ArchType::x86_64:
-      return CPUType::X64;
-    case Triple::ArchType::thumb:
-      return CPUType::Thumb;
-    default:
-      report_fatal_error("target architecture doesn't map to a CodeView "
-                         "CPUType");
+  case Triple::ArchType::x86:
+    return CPUType::Pentium3;
+  case Triple::ArchType::x86_64:
+    return CPUType::X64;
+  case Triple::ArchType::thumb:
+    return CPUType::Thumb;
+  case Triple::ArchType::aarch64:
+    return CPUType::ARM64;
+  default:
+    report_fatal_error("target architecture doesn't map to a CodeView CPUType");
   }
 }
 
-} // end anonymous namespace
-
 void CodeViewDebug::emitCompilerInformation() {
   MCContext &Context = MMI->getContext();
   MCSymbol *CompilerBegin = Context.createTempSymbol(),
@@ -678,8 +726,10 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
   OS.AddComment("Inlinee lines subsection");
   MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines);
 
-  // We don't provide any extra file info.
-  // FIXME: Find out if debuggers use this info.
+  // We emit the checksum info for files.  This is used by debuggers to
+  // determine if a pdb matches the source before loading it.  Visual Studio,
+  // for instance, will display a warning that the breakpoints are not valid if
+  // the pdb does not match the source.
   OS.AddComment("Inlinee lines signature");
   OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4);
 
@@ -692,13 +742,10 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
     OS.AddComment("Inlined function " + SP->getName() + " starts at " +
                   SP->getFilename() + Twine(':') + Twine(SP->getLine()));
     OS.AddBlankLine();
-    // The filechecksum table uses 8 byte entries for now, and file ids start at
-    // 1.
-    unsigned FileOffset = (FileId - 1) * 8;
     OS.AddComment("Type index of inlined function");
     OS.EmitIntValue(InlineeIdx.getIndex(), 4);
     OS.AddComment("Offset into filechecksum table");
-    OS.EmitIntValue(FileOffset, 4);
+    OS.EmitCVFileChecksumOffsetDirective(FileId);
     OS.AddComment("Starting line number");
     OS.EmitIntValue(SP->getLine(), 4);
   }
@@ -799,6 +846,10 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
   if (FuncName.empty())
     FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName());
 
+  // Emit FPO data, but only on 32-bit x86. No other platforms use it.
+  if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86)
+    OS.EmitCVFPOData(Fn);
+
   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
   OS.AddComment("Symbol subsection for " + Twine(FuncName));
   MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
@@ -858,6 +909,30 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
       emitInlinedCallSite(FI, InlinedAt, I->second);
     }
 
+    for (auto Annot : FI.Annotations) {
+      MCSymbol *Label = Annot.first;
+      MDTuple *Strs = cast<MDTuple>(Annot.second);
+      MCSymbol *AnnotBegin = MMI->getContext().createTempSymbol(),
+               *AnnotEnd = MMI->getContext().createTempSymbol();
+      OS.AddComment("Record length");
+      OS.emitAbsoluteSymbolDiff(AnnotEnd, AnnotBegin, 2);
+      OS.EmitLabel(AnnotBegin);
+      OS.AddComment("Record kind: S_ANNOTATION");
+      OS.EmitIntValue(SymbolKind::S_ANNOTATION, 2);
+      OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
+      // FIXME: Make sure we don't overflow the max record size.
+      OS.EmitCOFFSectionIndex(Label);
+      OS.EmitIntValue(Strs->getNumOperands(), 2);
+      for (Metadata *MD : Strs->operands()) {
+        // MDStrings are null terminated, so we can do EmitBytes and get the
+        // nice .asciz directive.
+        StringRef Str = cast<MDString>(MD)->getString();
+        assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString");
+        OS.EmitBytes(StringRef(Str.data(), Str.size() + 1));
+      }
+      OS.EmitLabel(AnnotEnd);
+    }
+
     if (SP != nullptr)
       emitDebugInfoForUDTs(LocalUDTs);
 
@@ -947,13 +1022,110 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
   }
 }
 
+static bool canUseReferenceType(const DbgVariableLocation &Loc) {
+  return !Loc.LoadChain.empty() && Loc.LoadChain.back() == 0;
+}
+
+static bool needsReferenceType(const DbgVariableLocation &Loc) {
+  return Loc.LoadChain.size() == 2 && Loc.LoadChain.back() == 0;
+}
+
+void CodeViewDebug::calculateRanges(
+    LocalVariable &Var, const DbgValueHistoryMap::InstrRanges &Ranges) {
+  const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
+
+  // Calculate the definition ranges.
+  for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+    const InsnRange &Range = *I;
+    const MachineInstr *DVInst = Range.first;
+    assert(DVInst->isDebugValue() && "Invalid History entry");
+    // FIXME: Find a way to represent constant variables, since they are
+    // relatively common.
+    Optional<DbgVariableLocation> Location =
+        DbgVariableLocation::extractFromMachineInstruction(*DVInst);
+    if (!Location)
+      continue;
+
+    // CodeView can only express variables in register and variables in memory
+    // at a constant offset from a register. However, for variables passed
+    // indirectly by pointer, it is common for that pointer to be spilled to a
+    // stack location. For the special case of one offseted load followed by a
+    // zero offset load (a pointer spilled to the stack), we change the type of
+    // the local variable from a value type to a reference type. This tricks the
+    // debugger into doing the load for us.
+    if (Var.UseReferenceType) {
+      // We're using a reference type. Drop the last zero offset load.
+      if (canUseReferenceType(*Location))
+        Location->LoadChain.pop_back();
+      else
+        continue;
+    } else if (needsReferenceType(*Location)) {
+      // This location can't be expressed without switching to a reference type.
+      // Start over using that.
+      Var.UseReferenceType = true;
+      Var.DefRanges.clear();
+      calculateRanges(Var, Ranges);
+      return;
+    }
+
+    // We can only handle a register or an offseted load of a register.
+    if (Location->Register == 0 || Location->LoadChain.size() > 1)
+      continue;
+    {
+      LocalVarDefRange DR;
+      DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
+      DR.InMemory = !Location->LoadChain.empty();
+      DR.DataOffset =
+          !Location->LoadChain.empty() ? Location->LoadChain.back() : 0;
+      if (Location->FragmentInfo) {
+        DR.IsSubfield = true;
+        DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8;
+      } else {
+        DR.IsSubfield = false;
+        DR.StructOffset = 0;
+      }
+
+      if (Var.DefRanges.empty() ||
+          Var.DefRanges.back().isDifferentLocation(DR)) {
+        Var.DefRanges.emplace_back(std::move(DR));
+      }
+    }
+
+    // Compute the label range.
+    const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
+    const MCSymbol *End = getLabelAfterInsn(Range.second);
+    if (!End) {
+      // This range is valid until the next overlapping bitpiece. In the
+      // common case, ranges will not be bitpieces, so they will overlap.
+      auto J = std::next(I);
+      const DIExpression *DIExpr = DVInst->getDebugExpression();
+      while (J != E &&
+             !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
+        ++J;
+      if (J != E)
+        End = getLabelBeforeInsn(J->first);
+      else
+        End = Asm->getFunctionEnd();
+    }
+
+    // If the last range end is our begin, just extend the last range.
+    // Otherwise make a new range.
+    SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R =
+        Var.DefRanges.back().Ranges;
+    if (!R.empty() && R.back().second == Begin)
+      R.back().second = End;
+    else
+      R.emplace_back(Begin, End);
+
+    // FIXME: Do more range combining.
+  }
+}
+
 void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
   DenseSet<InlinedVariable> Processed;
   // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMFTable(Processed);
 
-  const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
-
   for (const auto &I : DbgValues) {
     InlinedVariable IV = I.first;
     if (Processed.count(IV))
@@ -976,87 +1148,15 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
     LocalVariable Var;
     Var.DIVar = DIVar;
 
-    // Calculate the definition ranges.
-    for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
-      const InsnRange &Range = *I;
-      const MachineInstr *DVInst = Range.first;
-      assert(DVInst->isDebugValue() && "Invalid History entry");
-      const DIExpression *DIExpr = DVInst->getDebugExpression();
-      bool IsSubfield = false;
-      unsigned StructOffset = 0;
-
-      // Handle fragments.
-      auto Fragment = DIExpr->getFragmentInfo();
-      if (Fragment) {
-        IsSubfield = true;
-        StructOffset = Fragment->OffsetInBits / 8;
-      } else if (DIExpr->getNumElements() > 0) {
-        continue; // Ignore unrecognized exprs.
-      }
-
-      // Bail if operand 0 is not a valid register. This means the variable is a
-      // simple constant, or is described by a complex expression.
-      // FIXME: Find a way to represent constant variables, since they are
-      // relatively common.
-      unsigned Reg =
-          DVInst->getOperand(0).isReg() ? DVInst->getOperand(0).getReg() : 0;
-      if (Reg == 0)
-        continue;
-
-      // Handle the two cases we can handle: indirect in memory and in register.
-      unsigned CVReg = TRI->getCodeViewRegNum(Reg);
-      bool InMemory = DVInst->getOperand(1).isImm();
-      int Offset = InMemory ? DVInst->getOperand(1).getImm() : 0;
-      {
-        LocalVarDefRange DR;
-        DR.CVRegister = CVReg;
-        DR.InMemory = InMemory;
-        DR.DataOffset = Offset;
-        DR.IsSubfield = IsSubfield;
-        DR.StructOffset = StructOffset;
-
-        if (Var.DefRanges.empty() ||
-            Var.DefRanges.back().isDifferentLocation(DR)) {
-          Var.DefRanges.emplace_back(std::move(DR));
-        }
-      }
-
-      // Compute the label range.
-      const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
-      const MCSymbol *End = getLabelAfterInsn(Range.second);
-      if (!End) {
-        // This range is valid until the next overlapping bitpiece. In the
-        // common case, ranges will not be bitpieces, so they will overlap.
-        auto J = std::next(I);
-        while (J != E &&
-               !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
-          ++J;
-        if (J != E)
-          End = getLabelBeforeInsn(J->first);
-        else
-          End = Asm->getFunctionEnd();
-      }
-
-      // If the last range end is our begin, just extend the last range.
-      // Otherwise make a new range.
-      SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &Ranges =
-          Var.DefRanges.back().Ranges;
-      if (!Ranges.empty() && Ranges.back().second == Begin)
-        Ranges.back().second = End;
-      else
-        Ranges.emplace_back(Begin, End);
-
-      // FIXME: Do more range combining.
-    }
-
+    calculateRanges(Var, Ranges);
     recordLocalVariable(std::move(Var), InlinedAt);
   }
 }
 
 void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
-  const Function *GV = MF->getFunction();
-  assert(FnDebugInfo.count(GV) == false);
-  CurFn = &FnDebugInfo[GV];
+  const Function &GV = MF->getFunction();
+  assert(FnDebugInfo.count(&GV) == false);
+  CurFn = &FnDebugInfo[&GV];
   CurFn->FuncId = NextFuncId++;
   CurFn->Begin = Asm->getFunctionBegin();
 
@@ -1087,10 +1187,40 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
   }
 }
 
-void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) {
+static bool shouldEmitUdt(const DIType *T) {
+  if (!T)
+    return false;
+
+  // MSVC does not emit UDTs for typedefs that are scoped to classes.
+  if (T->getTag() == dwarf::DW_TAG_typedef) {
+    if (DIScope *Scope = T->getScope().resolve()) {
+      switch (Scope->getTag()) {
+      case dwarf::DW_TAG_structure_type:
+      case dwarf::DW_TAG_class_type:
+      case dwarf::DW_TAG_union_type:
+        return false;
+      }
+    }
+  }
+
+  while (true) {
+    if (!T || T->isForwardDecl())
+      return false;
+
+    const DIDerivedType *DT = dyn_cast<DIDerivedType>(T);
+    if (!DT)
+      return true;
+    T = DT->getBaseType().resolve();
+  }
+  return true;
+}
+
+void CodeViewDebug::addToUDTs(const DIType *Ty) {
   // Don't record empty UDTs.
   if (Ty->getName().empty())
     return;
+  if (!shouldEmitUdt(Ty))
+    return;
 
   SmallVector<StringRef, 5> QualifiedNameComponents;
   const DISubprogram *ClosestSubprogram = getQualifiedNameComponents(
@@ -1099,10 +1229,11 @@ void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) {
   std::string FullyQualifiedName =
       getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
 
-  if (ClosestSubprogram == nullptr)
-    GlobalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
-  else if (ClosestSubprogram == CurrentSubprogram)
-    LocalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
+  if (ClosestSubprogram == nullptr) {
+    GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
+  } else if (ClosestSubprogram == CurrentSubprogram) {
+    LocalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
+  }
 
   // TODO: What if the ClosestSubprogram is neither null or the current
   // subprogram?  Currently, the UDT just gets dropped on the floor.
@@ -1139,7 +1270,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
       // The member function type of a member function pointer has no
       // ThisAdjustment.
       return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy,
-                                     /*ThisAdjustment=*/0);
+                                     /*ThisAdjustment=*/0,
+                                     /*IsStaticMethod=*/false);
     }
     return lowerTypeFunction(cast<DISubroutineType>(Ty));
   case dwarf::DW_TAG_enumeration_type:
@@ -1160,7 +1292,7 @@ TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
   TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef);
   StringRef TypeName = Ty->getName();
 
-  addToUDTs(Ty, UnderlyingTypeIndex);
+  addToUDTs(Ty);
 
   if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) &&
       TypeName == "HRESULT")
@@ -1193,11 +1325,12 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
            "codeview doesn't support subranges with lower bounds");
     int64_t Count = Subrange->getCount();
 
-    // Variable Length Array (VLA) has Count equal to '-1'.
-    // Replace with Count '1', assume it is the minimum VLA length.
-    // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
+    // Forward declarations of arrays without a size and VLAs use a count of -1.
+    // Emit a count of zero in these cases to match what MSVC does for arrays
+    // without a size. MSVC doesn't support VLAs, so it's not clear what we
+    // should do for them even if we could distinguish them.
     if (Count == -1)
-      Count = 1;
+      Count = 0;
 
     // Update the element size and element type index for subsequent subranges.
     ElementSize *= Count;
@@ -1209,7 +1342,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
 
     StringRef Name = (i == 0) ? Ty->getName() : "";
     ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name);
-    ElementTypeIndex = TypeTable.writeKnownType(AR);
+    ElementTypeIndex = TypeTable.writeLeafType(AR);
   }
 
   return ElementTypeIndex;
@@ -1342,7 +1475,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
   // do.
   PointerOptions PO = PointerOptions::None;
   PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
-  return TypeTable.writeKnownType(PR);
+  return TypeTable.writeLeafType(PR);
 }
 
 static PointerToMemberRepresentation
@@ -1393,7 +1526,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
   MemberPointerInfo MPI(
       ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
   PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
-  return TypeTable.writeKnownType(PR);
+  return TypeTable.writeLeafType(PR);
 }
 
 /// Given a DWARF calling convention, get the CodeView equivalent. If we don't
@@ -1432,7 +1565,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
   }
   TypeIndex ModifiedTI = getTypeIndex(BaseTy);
   ModifierRecord MR(ModifiedTI, Mods);
-  return TypeTable.writeKnownType(MR);
+  return TypeTable.writeLeafType(MR);
 }
 
 TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
@@ -1449,18 +1582,19 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
   }
 
   ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
-  TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
+  TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
   ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
                             ArgTypeIndices.size(), ArgListIndex);
-  return TypeTable.writeKnownType(Procedure);
+  return TypeTable.writeLeafType(Procedure);
 }
 
 TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
                                                  const DIType *ClassTy,
-                                                 int ThisAdjustment) {
+                                                 int ThisAdjustment,
+                                                 bool IsStaticMethod) {
   // Lower the containing class type.
   TypeIndex ClassType = getTypeIndex(ClassTy);
 
@@ -1475,26 +1609,22 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
     ReturnTypeIndex = ReturnAndArgTypesRef.front();
     ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
   }
-  TypeIndex ThisTypeIndex = TypeIndex::Void();
-  if (!ArgTypeIndices.empty()) {
+  TypeIndex ThisTypeIndex;
+  if (!IsStaticMethod && !ArgTypeIndices.empty()) {
     ThisTypeIndex = ArgTypeIndices.front();
     ArgTypeIndices = ArgTypeIndices.drop_front();
   }
 
   ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
-  TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
+  TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec);
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
-  // TODO: Need to use the correct values for:
-  //       FunctionOptions
-  //       ThisPointerAdjustment.
+  // TODO: Need to use the correct values for FunctionOptions.
   MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
                            FunctionOptions::None, ArgTypeIndices.size(),
                            ArgListIndex, ThisAdjustment);
-  TypeIndex TI = TypeTable.writeKnownType(MFR);
-
-  return TI;
+  return TypeTable.writeLeafType(MFR);
 }
 
 TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
@@ -1503,7 +1633,7 @@ TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
   SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
 
   VFTableShapeRecord VFTSR(Slots);
-  return TypeTable.writeKnownType(VFTSR);
+  return TypeTable.writeLeafType(VFTSR);
 }
 
 static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
@@ -1530,6 +1660,9 @@ static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) {
 
 static MethodKind translateMethodKindFlags(const DISubprogram *SP,
                                            bool Introduced) {
+  if (SP->getFlags() & DINode::FlagStaticMember)
+    return MethodKind::Static;
+
   switch (SP->getVirtuality()) {
   case dwarf::DW_VIRTUALITY_none:
     break;
@@ -1542,8 +1675,6 @@ static MethodKind translateMethodKindFlags(const DISubprogram *SP,
     llvm_unreachable("unhandled virtuality case");
   }
 
-  // FIXME: Get Clang to mark DISubprogram as static and do something with it.
-
   return MethodKind::Vanilla;
 }
 
@@ -1593,9 +1724,8 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
   if (Ty->isForwardDecl()) {
     CO |= ClassOptions::ForwardReference;
   } else {
-    FieldListRecordBuilder FLRB(TypeTable);
-
-    FLRB.begin();
+    ContinuationRecordBuilder ContinuationBuilder;
+    ContinuationBuilder.begin(ContinuationRecordKind::FieldList);
     for (const DINode *Element : Ty->getElements()) {
       // We assume that the frontend provides all members in source declaration
       // order, which is what MSVC does.
@@ -1603,18 +1733,18 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
         EnumeratorRecord ER(MemberAccess::Public,
                             APSInt::getUnsigned(Enumerator->getValue()),
                             Enumerator->getName());
-        FLRB.writeMemberType(ER);
+        ContinuationBuilder.writeMemberType(ER);
         EnumeratorCount++;
       }
     }
-    FTI = FLRB.end(true);
+    FTI = TypeTable.insertRecord(ContinuationBuilder);
   }
 
   std::string FullName = getFullyQualifiedName(Ty);
 
   EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),
                 getTypeIndex(Ty->getBaseType()));
-  return TypeTable.writeKnownType(ER);
+  return TypeTable.writeLeafType(ER);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1643,7 +1773,7 @@ struct llvm::ClassInfo {
 
   TypeIndex VShapeTI;
 
-  std::vector<const DICompositeType *> NestedClasses;
+  std::vector<const DIType *> NestedTypes;
 };
 
 void CodeViewDebug::clear() {
@@ -1694,12 +1824,14 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
       } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type &&
                  DDTy->getName() == "__vtbl_ptr_type") {
         Info.VShapeTI = getTypeIndex(DDTy);
+      } else if (DDTy->getTag() == dwarf::DW_TAG_typedef) {
+        Info.NestedTypes.push_back(DDTy);
       } else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
         // Ignore friend members. It appears that MSVC emitted info about
         // friends in the past, but modern versions do not.
       }
     } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
-      Info.NestedClasses.push_back(Composite);
+      Info.NestedTypes.push_back(Composite);
     }
     // Skip other unrecognized kinds of elements.
   }
@@ -1715,7 +1847,7 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
   std::string FullName = getFullyQualifiedName(Ty);
   ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0,
                  FullName, Ty->getIdentifier());
-  TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR);
+  TypeIndex FwdDeclTI = TypeTable.writeLeafType(CR);
   if (!Ty->isForwardDecl())
     DeferredCompleteTypes.push_back(Ty);
   return FwdDeclTI;
@@ -1741,16 +1873,17 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
 
   ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI,
                  SizeInBytes, FullName, Ty->getIdentifier());
-  TypeIndex ClassTI = TypeTable.writeKnownType(CR);
+  TypeIndex ClassTI = TypeTable.writeLeafType(CR);
 
   if (const auto *File = Ty->getFile()) {
     StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File));
-    TypeIndex SIDI = TypeTable.writeKnownType(SIDR);
+    TypeIndex SIDI = TypeTable.writeLeafType(SIDR);
+
     UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
-    TypeTable.writeKnownType(USLR);
+    TypeTable.writeLeafType(USLR);
   }
 
-  addToUDTs(Ty, ClassTI);
+  addToUDTs(Ty);
 
   return ClassTI;
 }
@@ -1760,7 +1893,7 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
       ClassOptions::ForwardReference | getCommonClassOptions(Ty);
   std::string FullName = getFullyQualifiedName(Ty);
   UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier());
-  TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR);
+  TypeIndex FwdDeclTI = TypeTable.writeLeafType(UR);
   if (!Ty->isForwardDecl())
     DeferredCompleteTypes.push_back(Ty);
   return FwdDeclTI;
@@ -1782,14 +1915,15 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
 
   UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName,
                  Ty->getIdentifier());
-  TypeIndex UnionTI = TypeTable.writeKnownType(UR);
+  TypeIndex UnionTI = TypeTable.writeLeafType(UR);
 
   StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
-  TypeIndex SIRI = TypeTable.writeKnownType(SIR);
+  TypeIndex SIRI = TypeTable.writeLeafType(SIR);
+
   UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine());
-  TypeTable.writeKnownType(USLR);
+  TypeTable.writeLeafType(USLR);
 
-  addToUDTs(Ty, UnionTI);
+  addToUDTs(Ty);
 
   return UnionTI;
 }
@@ -1802,8 +1936,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
   // list record.
   unsigned MemberCount = 0;
   ClassInfo Info = collectClassInfo(Ty);
-  FieldListRecordBuilder FLBR(TypeTable);
-  FLBR.begin();
+  ContinuationRecordBuilder ContinuationBuilder;
+  ContinuationBuilder.begin(ContinuationRecordKind::FieldList);
 
   // Create base classes.
   for (const DIDerivedType *I : Info.Inheritance) {
@@ -1821,14 +1955,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
           getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
           VBTableIndex);
 
-      FLBR.writeMemberType(VBCR);
+      ContinuationBuilder.writeMemberType(VBCR);
     } else {
       assert(I->getOffsetInBits() % 8 == 0 &&
              "bases must be on byte boundaries");
       BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()),
                           getTypeIndex(I->getBaseType()),
                           I->getOffsetInBits() / 8);
-      FLBR.writeMemberType(BCR);
+      ContinuationBuilder.writeMemberType(BCR);
     }
   }
 
@@ -1842,7 +1976,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
 
     if (Member->isStaticMember()) {
       StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName);
-      FLBR.writeMemberType(SDMR);
+      ContinuationBuilder.writeMemberType(SDMR);
       MemberCount++;
       continue;
     }
@@ -1851,7 +1985,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
     if ((Member->getFlags() & DINode::FlagArtificial) &&
         Member->getName().startswith("_vptr$")) {
       VFPtrRecord VFPR(getTypeIndex(Member->getBaseType()));
-      FLBR.writeMemberType(VFPR);
+      ContinuationBuilder.writeMemberType(VFPR);
       MemberCount++;
       continue;
     }
@@ -1868,12 +2002,12 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
       StartBitOffset -= MemberOffsetInBits;
       BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(),
                          StartBitOffset);
-      MemberBaseType = TypeTable.writeKnownType(BFR);
+      MemberBaseType = TypeTable.writeLeafType(BFR);
     }
     uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
     DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes,
                          MemberName);
-    FLBR.writeMemberType(DMR);
+    ContinuationBuilder.writeMemberType(DMR);
     MemberCount++;
   }
 
@@ -1898,40 +2032,42 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
     }
     assert(!Methods.empty() && "Empty methods map entry");
     if (Methods.size() == 1)
-      FLBR.writeMemberType(Methods[0]);
+      ContinuationBuilder.writeMemberType(Methods[0]);
     else {
+      // FIXME: Make this use its own ContinuationBuilder so that
+      // MethodOverloadList can be split correctly.
       MethodOverloadListRecord MOLR(Methods);
-      TypeIndex MethodList = TypeTable.writeKnownType(MOLR);
+      TypeIndex MethodList = TypeTable.writeLeafType(MOLR);
+
       OverloadedMethodRecord OMR(Methods.size(), MethodList, Name);
-      FLBR.writeMemberType(OMR);
+      ContinuationBuilder.writeMemberType(OMR);
     }
   }
 
   // Create nested classes.
-  for (const DICompositeType *Nested : Info.NestedClasses) {
+  for (const DIType *Nested : Info.NestedTypes) {
     NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
-    FLBR.writeMemberType(R);
+    ContinuationBuilder.writeMemberType(R);
     MemberCount++;
   }
 
-  TypeIndex FieldTI = FLBR.end(true);
+  TypeIndex FieldTI = TypeTable.insertRecord(ContinuationBuilder);
   return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
-                         !Info.NestedClasses.empty());
+                         !Info.NestedTypes.empty());
 }
 
 TypeIndex CodeViewDebug::getVBPTypeIndex() {
   if (!VBPType.getIndex()) {
     // Make a 'const int *' type.
     ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
-    TypeIndex ModifiedTI = TypeTable.writeKnownType(MR);
+    TypeIndex ModifiedTI = TypeTable.writeLeafType(MR);
 
     PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
                                                   : PointerKind::Near32;
     PointerMode PM = PointerMode::Pointer;
     PointerOptions PO = PointerOptions::None;
     PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
-
-    VBPType = TypeTable.writeKnownType(PR);
+    VBPType = TypeTable.writeLeafType(PR);
   }
 
   return VBPType;
@@ -1957,6 +2093,16 @@ TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
   return recordTypeIndexForDINode(Ty, TI, ClassTy);
 }
 
+TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
+  DIType *Ty = TypeRef.resolve();
+  PointerRecord PR(getTypeIndex(Ty),
+                   getPointerSizeInBytes() == 8 ? PointerKind::Near64
+                                                : PointerKind::Near32,
+                   PointerMode::LValueReference, PointerOptions::None,
+                   Ty->getSizeInBits() / 8);
+  return TypeTable.writeLeafType(PR);
+}
+
 TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
   const DIType *Ty = TypeRef.resolve();
 
@@ -2064,7 +2210,9 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
     Flags |= LocalSymFlags::IsOptimizedOut;
 
   OS.AddComment("TypeIndex");
-  TypeIndex TI = getCompleteTypeIndex(Var.DIVar->getType());
+  TypeIndex TI = Var.UseReferenceType
+                     ? getTypeIndexForReferenceTo(Var.DIVar->getType())
+                     : getCompleteTypeIndex(Var.DIVar->getType());
   OS.EmitIntValue(TI.getIndex(), 4);
   OS.AddComment("Flags");
   OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
@@ -2125,19 +2273,21 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
 }
 
 void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
-  const Function *GV = MF->getFunction();
-  assert(FnDebugInfo.count(GV));
-  assert(CurFn == &FnDebugInfo[GV]);
+  const Function &GV = MF->getFunction();
+  assert(FnDebugInfo.count(&GV));
+  assert(CurFn == &FnDebugInfo[&GV]);
 
-  collectVariableInfo(GV->getSubprogram());
+  collectVariableInfo(GV.getSubprogram());
 
   // Don't emit anything if we don't have any line tables.
   if (!CurFn->HaveLineInfo) {
-    FnDebugInfo.erase(GV);
+    FnDebugInfo.erase(&GV);
     CurFn = nullptr;
     return;
   }
 
+  CurFn->Annotations = MF->getCodeViewAnnotations();
+
   CurFn->End = Asm->getFunctionEnd();
 
   CurFn = nullptr;
@@ -2156,6 +2306,8 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
   DebugLoc DL = MI->getDebugLoc();
   if (!DL && MI->getParent() != PrevInstBB) {
     for (const auto &NextMI : *MI->getParent()) {
+      if (NextMI.isDebugValue())
+        continue;
       DL = NextMI.getDebugLoc();
       if (DL)
         break;
@@ -2187,8 +2339,11 @@ void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
 }
 
 void CodeViewDebug::emitDebugInfoForUDTs(
-    ArrayRef<std::pair<std::string, TypeIndex>> UDTs) {
-  for (const std::pair<std::string, codeview::TypeIndex> &UDT : UDTs) {
+    ArrayRef<std::pair<std::string, const DIType *>> UDTs) {
+  for (const auto &UDT : UDTs) {
+    const DIType *T = UDT.second;
+    assert(shouldEmitUdt(T));
+
     MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(),
              *UDTRecordEnd = MMI->getContext().createTempSymbol();
     OS.AddComment("Record length");
@@ -2199,7 +2354,7 @@ void CodeViewDebug::emitDebugInfoForUDTs(
     OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2);
 
     OS.AddComment("Type");
-    OS.EmitIntValue(UDT.second.getIndex(), 4);
+    OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4);
 
     emitNullTerminatedSymbolName(OS, UDT.first);
     OS.EmitLabel(UDTRecordEnd);
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index fd8f60425c24..69e93640d7ef 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -23,8 +23,8 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
@@ -52,7 +52,7 @@ class MachineFunction;
 class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   MCStreamer &OS;
   BumpPtrAllocator Allocator;
-  codeview::TypeTableBuilder TypeTable;
+  codeview::GlobalTypeTableBuilder TypeTable;
 
   /// Represents the most general definition range.
   struct LocalVarDefRange {
@@ -94,6 +94,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   struct LocalVariable {
     const DILocalVariable *DIVar = nullptr;
     SmallVector<LocalVarDefRange, 1> DefRanges;
+    bool UseReferenceType = false;
   };
 
   struct InlineSite {
@@ -118,6 +119,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
     SmallVector<LocalVariable, 1> Locals;
 
+    std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
+
     const MCSymbol *Begin = nullptr;
     const MCSymbol *End = nullptr;
     unsigned FuncId = 0;
@@ -147,6 +150,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
 
+  void calculateRanges(LocalVariable &Var,
+                       const DbgValueHistoryMap::InstrRanges &Ranges);
+
   static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
                                         const FunctionInfo &FI,
                                         const InlineSite &Site);
@@ -155,8 +161,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   /// emit at the end of the TU.
   MapVector<const Function *, FunctionInfo> FnDebugInfo;
 
-  /// Map from DIFile to .cv_file id.
-  DenseMap<const DIFile *, unsigned> FileIdMap;
+  /// Map from full file path to .cv_file id. Full paths are built from DIFiles
+  /// and are stored in FileToFilepathMap;
+  DenseMap<StringRef, unsigned> FileIdMap;
 
   /// All inlined subprograms in the order they should be emitted.
   SmallSetVector<const DISubprogram *, 4> InlinedSubprograms;
@@ -187,8 +194,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   // The UDTs we have seen while processing types; each entry is a pair of type
   // index and type name.
-  std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs,
-      GlobalUDTs;
+  std::vector<std::pair<std::string, const DIType *>> LocalUDTs;
+  std::vector<std::pair<std::string, const DIType *>> GlobalUDTs;
 
   using FileToFilepathMapTy = std::map<const DIFile *, std::string>;
   FileToFilepathMapTy FileToFilepathMap;
@@ -212,6 +219,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void emitTypeInformation();
 
+  void emitTypeGlobalHashes();
+
   void emitCompilerInformation();
 
   void emitInlineeLinesSubsection();
@@ -222,8 +231,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void emitDebugInfoForRetainedTypes();
 
-  void emitDebugInfoForUDTs(
-      ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs);
+  void
+  emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs);
 
   void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
                               const GlobalVariable *GV, MCSymbol *GVSym);
@@ -259,6 +268,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
                                    DITypeRef ClassTyRef = DITypeRef());
 
+  codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
+
   codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
                                             const DICompositeType *Class);
 
@@ -266,7 +277,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   codeview::TypeIndex getVBPTypeIndex();
 
-  void addToUDTs(const DIType *Ty, codeview::TypeIndex TI);
+  void addToUDTs(const DIType *Ty);
 
   codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
   codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
@@ -279,7 +290,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
                                               const DIType *ClassTy,
-                                              int ThisAdjustment);
+                                              int ThisAdjustment,
+                                              bool IsStaticMethod);
   codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
   codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
   codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 886e6e264b3e..b3148db30cd6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -777,6 +777,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break;
   case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break;
   case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
+  case dwarf::DW_FORM_data16: break;
   }
 
   for (const auto &V : values())
@@ -791,6 +792,7 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
   case dwarf::DW_FORM_block:  return Size + getULEB128Size(Size);
+  case dwarf::DW_FORM_data16: return 16;
   default: llvm_unreachable("Improper form for block");
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index c2ad9db81cfd..856758c8e4f6 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===//
+//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,17 +9,24 @@
 
 #include "DbgValueHistoryCalculator.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
 #include <map>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "dwarfdebug"
@@ -72,10 +79,12 @@ unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
 }
 
 namespace {
+
 // Maps physreg numbers to the variables they describe.
-typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
-typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap;
-}
+using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
+using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>;
+
+} // end anonymous namespace
 
 // \brief Claim that @Var is not described by @RegNo anymore.
 static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
@@ -83,7 +92,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
-  const auto &VarPos = find(VarSet, Var);
+  const auto &VarPos = llvm::find(VarSet, Var);
   assert(VarPos != VarSet.end());
   VarSet.erase(VarPos);
   // Don't keep empty sets in a map to keep it as small as possible.
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 16d2d7fd7e99..a7b0562e8102 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===//
+//===- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,9 +13,11 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include <utility>
 
 namespace llvm {
 
+class DILocalVariable;
 class MachineFunction;
 class MachineInstr;
 class TargetRegisterInfo;
@@ -29,11 +31,11 @@ class DbgValueHistoryMap {
   // instruction of the next instruction range, or until the end of the
   // function.
 public:
-  typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
-  typedef SmallVector<InstrRange, 4> InstrRanges;
-  typedef std::pair<const DILocalVariable *, const DILocation *>
-      InlinedVariable;
-  typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap;
+  using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>;
+  using InstrRanges = SmallVector<InstrRange, 4>;
+  using InlinedVariable =
+      std::pair<const DILocalVariable *, const DILocation *>;
+  using InstrRangesMap = MapVector<InlinedVariable, InstrRanges>;
 
 private:
   InstrRangesMap VarInstrRanges;
@@ -41,6 +43,7 @@ private:
 public:
   void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
   void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
+
   // Returns register currently describing @Var. If @Var is currently
   // unaccessible or is not described by a register, returns 0.
   unsigned getRegisterForVar(InlinedVariable Var) const;
@@ -54,6 +57,7 @@ public:
 void calculateDbgValueHistory(const MachineFunction *MF,
                               const TargetRegisterInfo *TRI,
                               DbgValueHistoryMap &Result);
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 0971c5942203..d94b0e5c2118 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -13,16 +13,76 @@
 //===----------------------------------------------------------------------===//
 
 #include "DebugHandlerBase.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
+Optional<DbgVariableLocation>
+DbgVariableLocation::extractFromMachineInstruction(
+    const MachineInstr &Instruction) {
+  DbgVariableLocation Location;
+  if (!Instruction.isDebugValue())
+    return None;
+  if (!Instruction.getOperand(0).isReg())
+    return None;
+  Location.Register = Instruction.getOperand(0).getReg();
+  Location.FragmentInfo.reset();
+  // We only handle expressions generated by DIExpression::appendOffset,
+  // which doesn't require a full stack machine.
+  int64_t Offset = 0;
+  const DIExpression *DIExpr = Instruction.getDebugExpression();
+  auto Op = DIExpr->expr_op_begin();
+  while (Op != DIExpr->expr_op_end()) {
+    switch (Op->getOp()) {
+    case dwarf::DW_OP_constu: {
+      int Value = Op->getArg(0);
+      ++Op;
+      if (Op != DIExpr->expr_op_end()) {
+        switch (Op->getOp()) {
+        case dwarf::DW_OP_minus:
+          Offset -= Value;
+          break;
+        case dwarf::DW_OP_plus:
+          Offset += Value;
+          break;
+        default:
+          continue;
+        }
+      }
+    } break;
+    case dwarf::DW_OP_plus_uconst:
+      Offset += Op->getArg(0);
+      break;
+    case dwarf::DW_OP_LLVM_fragment:
+      Location.FragmentInfo = {Op->getArg(1), Op->getArg(0)};
+      break;
+    case dwarf::DW_OP_deref:
+      Location.LoadChain.push_back(Offset);
+      Offset = 0;
+      break;
+    default:
+      return None;
+    }
+    ++Op;
+  }
+
+  // Do one final implicit DW_OP_deref if this was an indirect DBG_VALUE
+  // instruction.
+  // FIXME: Replace these with DIExpression.
+  if (Instruction.isIndirectDebugValue())
+    Location.LoadChain.push_back(Offset);
+
+  return Location;
+}
+
 DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
 
 // Each LexicalScope has first instruction and last instruction to mark
@@ -119,7 +179,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI,
                          const MachineFunction *MF) {
   if (!MMI->hasDebugInfo())
     return false;
-  auto *SP = MF->getFunction()->getSubprogram();
+  auto *SP = MF->getFunction().getSubprogram();
   if (!SP)
     return false;
   assert(SP->getUnit());
@@ -163,7 +223,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
     // label, so arguments are visible when breaking at function entry.
     const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
     if (DIVar->isParameter() &&
-        getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
+        getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
       LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
       if (Ranges.front().first->getDebugExpression()->isFragment()) {
         // Mark all non-overlapping initial fragments.
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index 659a921e1fc5..245d70038de9 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -17,14 +17,38 @@
 
 #include "AsmPrinterHandler.h"
 #include "DbgValueHistoryCalculator.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 
 namespace llvm {
 
 class AsmPrinter;
+class MachineInstr;
 class MachineModuleInfo;
 
+/// Represents the location at which a variable is stored.
+struct DbgVariableLocation {
+  /// Base register.
+  unsigned Register;
+
+  /// Chain of offsetted loads necessary to load the value if it lives in
+  /// memory. Every load except for the last is pointer-sized.
+  SmallVector<int64_t, 1> LoadChain;
+
+  /// Present if the location is part of a larger variable.
+  llvm::Optional<llvm::DIExpression::FragmentInfo> FragmentInfo;
+
+  /// Extract a VariableLocation from a MachineInstr.
+  /// This will only work if Instruction is a debug value instruction
+  /// and the associated DIExpression is in one of the supported forms.
+  /// If these requirements are not met, the returned Optional will not
+  /// have a value.
+  static Optional<DbgVariableLocation>
+  extractFromMachineInstruction(const MachineInstr &Instruction);
+};
+
 /// Base class for debug information backends. Common functionality related to
 /// tracking which variables and scopes are alive at a given PC live here.
 class DebugHandlerBase : public AsmPrinterHandler {
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index a68e8cc6b4b3..3d6d8a76529c 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -81,7 +81,7 @@ public:
       if (isLocation()) {
         llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
         if (Loc.isIndirect())
-          llvm::dbgs() << '+' << Loc.getOffset();
+          llvm::dbgs() << "+0";
         llvm::dbgs() << "} ";
       }
       else if (isConstantInt())
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 9c324ea26ac8..c21b3d3451ad 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -1,4 +1,4 @@
-//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//===- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,16 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfAccelTable.h"
-#include "DwarfCompileUnit.h"
-#include "DwarfDebug.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <vector>
 
 using namespace llvm;
 
@@ -142,13 +148,13 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
   unsigned index = 0;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     Asm->OutStreamer->AddComment("Bucket " + Twine(i));
-    if (Buckets[i].size() != 0)
+    if (!Buckets[i].empty())
       Asm->EmitInt32(index);
     else
-      Asm->EmitInt32(UINT32_MAX);
+      Asm->EmitInt32(std::numeric_limits<uint32_t>::max());
     // Buckets point in the list of hashes, not to the data. Do not
     // increment the index multiple times in case of hash collisions.
-    uint64_t PrevHash = UINT64_MAX;
+    uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
     for (auto *HD : Buckets[i]) {
       uint32_t HashValue = HD->HashValue;
       if (PrevHash != HashValue)
@@ -161,7 +167,7 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
 // Walk through the buckets and emit the individual hashes for each
 // bucket.
 void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
-  uint64_t PrevHash = UINT64_MAX;
+  uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
@@ -181,7 +187,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
 // beginning of the section. The non-section symbol will be output later
 // when we emit the actual data.
 void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
-  uint64_t PrevHash = UINT64_MAX;
+  uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
@@ -205,13 +211,14 @@ void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
 // Terminate each HashData bucket with 0.
 void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
-    uint64_t PrevHash = UINT64_MAX;
+    uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
       // Terminate the previous entry if there is no hash collision
       // with the current one.
-      if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+      if (PrevHash != std::numeric_limits<uint64_t>::max() &&
+          PrevHash != (*HI)->HashValue)
         Asm->EmitInt32(0);
       // Remember to emit the label for our offset.
       Asm->OutStreamer->EmitLabel((*HI)->Sym);
@@ -257,31 +264,30 @@ void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
 }
 
 #ifndef NDEBUG
-void DwarfAccelTable::print(raw_ostream &O) {
-
-  Header.print(O);
-  HeaderData.print(O);
+void DwarfAccelTable::print(raw_ostream &OS) {
+  Header.print(OS);
+  HeaderData.print(OS);
 
-  O << "Entries: \n";
+  OS << "Entries: \n";
   for (StringMap<DataArray>::const_iterator EI = Entries.begin(),
                                             EE = Entries.end();
        EI != EE; ++EI) {
-    O << "Name: " << EI->getKeyData() << "\n";
+    OS << "Name: " << EI->getKeyData() << "\n";
     for (HashDataContents *HD : EI->second.Values)
-      HD->print(O);
+      HD->print(OS);
   }
 
-  O << "Buckets and Hashes: \n";
+  OS << "Buckets and Hashes: \n";
   for (size_t i = 0, e = Buckets.size(); i < e; ++i)
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI)
-      (*HI)->print(O);
+      (*HI)->print(OS);
 
-  O << "Data: \n";
+  OS << "Data: \n";
   for (std::vector<HashData *>::const_iterator DI = Data.begin(),
                                                DE = Data.end();
        DI != DE; ++DI)
-    (*DI)->print(O);
+    (*DI)->print(OS);
 }
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index b1ef8cfe989d..f56199dc8e72 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//==- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,16 +15,19 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
 #include <vector>
 
 // The dwarf accelerator tables are an indirect hash table optimized
@@ -65,44 +68,35 @@ class AsmPrinter;
 class DwarfDebug;
 
 class DwarfAccelTable {
-
-  static uint32_t HashDJB(StringRef Str) {
-    uint32_t h = 5381;
-    for (unsigned i = 0, e = Str.size(); i != e; ++i)
-      h = ((h << 5) + h) + Str[i];
-    return h;
-  }
-
   // Helper function to compute the number of buckets needed based on
   // the number of unique hashes.
-  void ComputeBucketCount(void);
+  void ComputeBucketCount();
 
   struct TableHeader {
-    uint32_t magic;           // 'HASH' magic value to allow endian detection
-    uint16_t version;         // Version number.
-    uint16_t hash_function;   // The hash function enumeration that was used.
-    uint32_t bucket_count;    // The number of buckets in this hash table.
-    uint32_t hashes_count;    // The total number of unique hash values
-                              // and hash data offsets in this table.
-    uint32_t header_data_len; // The bytes to skip to get to the hash
-                              // indexes (buckets) for correct alignment.
+    uint32_t magic = MagicHash; // 'HASH' magic value to allow endian detection
+    uint16_t version = 1;       // Version number.
+    uint16_t hash_function = dwarf::DW_hash_function_djb;
+    // The hash function enumeration that was used.
+    uint32_t bucket_count = 0;  // The number of buckets in this hash table.
+    uint32_t hashes_count = 0;  // The total number of unique hash values
+                                // and hash data offsets in this table.
+    uint32_t header_data_len;   // The bytes to skip to get to the hash
+                                // indexes (buckets) for correct alignment.
     // Also written to disk is the implementation specific header data.
 
     static const uint32_t MagicHash = 0x48415348;
 
-    TableHeader(uint32_t data_len)
-        : magic(MagicHash), version(1),
-          hash_function(dwarf::DW_hash_function_djb), bucket_count(0),
-          hashes_count(0), header_data_len(data_len) {}
+    TableHeader(uint32_t data_len) : header_data_len(data_len) {}
 
 #ifndef NDEBUG
-    void print(raw_ostream &O) {
-      O << "Magic: " << format("0x%x", magic) << "\n"
-        << "Version: " << version << "\n"
-        << "Hash Function: " << hash_function << "\n"
-        << "Bucket Count: " << bucket_count << "\n"
-        << "Header Data Length: " << header_data_len << "\n";
+    void print(raw_ostream &OS) {
+      OS << "Magic: " << format("0x%x", magic) << "\n"
+         << "Version: " << version << "\n"
+         << "Hash Function: " << hash_function << "\n"
+         << "Bucket Count: " << bucket_count << "\n"
+         << "Header Data Length: " << header_data_len << "\n";
     }
+
     void dump() { print(dbgs()); }
 #endif
   };
@@ -127,11 +121,13 @@ public:
     uint16_t form; // DWARF DW_FORM_ defines
 
     constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
+
 #ifndef NDEBUG
-    void print(raw_ostream &O) {
-      O << "Type: " << dwarf::AtomTypeString(type) << "\n"
-        << "Form: " << dwarf::FormEncodingString(form) << "\n";
+    void print(raw_ostream &OS) {
+      OS << "Type: " << dwarf::AtomTypeString(type) << "\n"
+         << "Form: " << dwarf::FormEncodingString(form) << "\n";
     }
+
     void dump() { print(dbgs()); }
 #endif
   };
@@ -145,11 +141,12 @@ private:
         : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
 
 #ifndef NDEBUG
-    void print(raw_ostream &O) {
-      O << "die_offset_base: " << die_offset_base << "\n";
+    void print(raw_ostream &OS) {
+      OS << "die_offset_base: " << die_offset_base << "\n";
       for (size_t i = 0; i < Atoms.size(); i++)
-        Atoms[i].print(O);
+        Atoms[i].print(OS);
     }
+
     void dump() { print(dbgs()); }
 #endif
   };
@@ -168,11 +165,12 @@ public:
     char Flags; // Specific flags to output
 
     HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {}
+
 #ifndef NDEBUG
-    void print(raw_ostream &O) const {
-      O << "  Offset: " << Die->getOffset() << "\n";
-      O << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n";
-      O << "  Flags: " << Flags << "\n";
+    void print(raw_ostream &OS) const {
+      OS << "  Offset: " << Die->getOffset() << "\n"
+         << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n"
+         << "  Flags: " << Flags << "\n";
     }
 #endif
   };
@@ -183,39 +181,41 @@ private:
     DwarfStringPoolEntryRef Name;
     std::vector<HashDataContents *> Values;
   };
+
   friend struct HashData;
+
   struct HashData {
     StringRef Str;
     uint32_t HashValue;
     MCSymbol *Sym;
     DwarfAccelTable::DataArray &Data; // offsets
+
     HashData(StringRef S, DwarfAccelTable::DataArray &Data)
         : Str(S), Data(Data) {
-      HashValue = DwarfAccelTable::HashDJB(S);
+      HashValue = dwarf::djbHash(S);
     }
+
 #ifndef NDEBUG
-    void print(raw_ostream &O) {
-      O << "Name: " << Str << "\n";
-      O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
-      O << "  Symbol: ";
+    void print(raw_ostream &OS) {
+      OS << "Name: " << Str << "\n";
+      OS << "  Hash Value: " << format("0x%x", HashValue) << "\n";
+      OS << "  Symbol: ";
       if (Sym)
-        O << *Sym;
+        OS << *Sym;
       else
-        O << "<none>";
-      O << "\n";
+        OS << "<none>";
+      OS << "\n";
       for (HashDataContents *C : Data.Values) {
-        O << "  Offset: " << C->Die->getOffset() << "\n";
-        O << "  Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
-        O << "  Flags: " << C->Flags << "\n";
+        OS << "  Offset: " << C->Die->getOffset() << "\n";
+        OS << "  Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
+        OS << "  Flags: " << C->Flags << "\n";
       }
     }
+
     void dump() { print(dbgs()); }
 #endif
   };
 
-  DwarfAccelTable(const DwarfAccelTable &) = delete;
-  void operator=(const DwarfAccelTable &) = delete;
-
   // Internal Functions
   void EmitHeader(AsmPrinter *);
   void EmitBuckets(AsmPrinter *);
@@ -231,25 +231,31 @@ private:
   TableHeaderData HeaderData;
   std::vector<HashData *> Data;
 
-  typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries;
+  using StringEntries = StringMap<DataArray, BumpPtrAllocator &>;
+
   StringEntries Entries;
 
   // Buckets/Hashes/Offsets
-  typedef std::vector<HashData *> HashList;
-  typedef std::vector<HashList> BucketList;
+  using HashList = std::vector<HashData *>;
+  using BucketList = std::vector<HashList>;
   BucketList Buckets;
   HashList Hashes;
 
   // Public Implementation
 public:
   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
+  DwarfAccelTable(const DwarfAccelTable &) = delete;
+  DwarfAccelTable &operator=(const DwarfAccelTable &) = delete;
+
   void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0);
   void FinalizeTable(AsmPrinter *, StringRef);
   void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
 #ifndef NDEBUG
-  void print(raw_ostream &O);
+  void print(raw_ostream &OS);
   void dump() { print(dbgs()); }
 #endif
 };
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index dd7f7931b06b..cbb4c48b4d88 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -12,13 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
@@ -31,11 +30,7 @@
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
@@ -92,7 +87,7 @@ static MCSymbol *getExceptionSym(AsmPrinter *Asm) {
 
 void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
-  const Function *F = MF->getFunction();
+  const Function &F = MF->getFunction();
 
   // If any landing pads survive, we need an EH table.
   bool hasLandingPads = !MF->getLandingPads().empty();
@@ -105,17 +100,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
   const Function *Per = nullptr;
-  if (F->hasPersonalityFn())
-    Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+  if (F.hasPersonalityFn())
+    Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
 
   // Emit a personality function even when there are no landing pads
   forceEmitPersonality =
       // ...if a personality function is explicitly specified
-      F->hasPersonalityFn() &&
+      F.hasPersonalityFn() &&
       // ... and it's not known to be a noop in the absence of invokes
       !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
       // ... and we're not explicitly asked not to emit it
-      F->needsUnwindTableEntry();
+      F.needsUnwindTableEntry();
 
   shouldEmitPersonality =
       (forceEmitPersonality ||
@@ -148,8 +143,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
   if (!shouldEmitPersonality)
     return;
 
-  auto *F = MBB->getParent()->getFunction();
-  auto *P = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+  auto &F = MBB->getParent()->getFunction();
+  auto *P = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
   assert(P && "Expected personality function");
 
   // If we are forced to emit this personality, make sure to record
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 333d14a11af5..c8cd8eb8ffd3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units -----------===//
+//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,38 +12,58 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfCompileUnit.h"
+#include "AddressPool.h"
+#include "DwarfDebug.h"
 #include "DwarfExpression.h"
+#include "DwarfUnit.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
 
-namespace llvm {
+using namespace llvm;
 
 DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
                                    AsmPrinter *A, DwarfDebug *DW,
                                    DwarfFile *DWU)
-    : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID),
-      Skeleton(nullptr), BaseAddress(nullptr) {
+    : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) {
   insertDIE(Node, &getUnitDie());
   MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
 }
 
 /// addLabelAddress - Add a dwarf label attribute data and value using
 /// DW_FORM_addr or DW_FORM_GNU_addr_index.
-///
 void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
                                        const MCSymbol *Label) {
-
   // Don't use the address pool in non-fission or in the skeleton unit itself.
   // FIXME: Once GDB supports this, it's probably worthwhile using the address
   // pool from the skeleton - maybe even in non-fission (possibly fewer
@@ -336,23 +356,15 @@ void DwarfCompileUnit::constructScopeDIE(
     if (DD->isLexicalScopeDIENull(Scope))
       return;
 
-    unsigned ChildScopeCount;
+    bool HasNonScopeChildren = false;
 
     // We create children here when we know the scope DIE is not going to be
     // null and the children will be added to the scope DIE.
-    createScopeChildrenDIE(Scope, Children, &ChildScopeCount);
-
-    // Skip imported directives in gmlt-like data.
-    if (!includeMinimalInlineScopes()) {
-      // There is no need to emit empty lexical block DIE.
-      for (const auto *IE : ImportedEntities[DS])
-        Children.push_back(
-            constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
-    }
+    createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren);
 
     // If there are only other scopes as children, put them directly in the
     // parent instead, as this scope would serve no purpose.
-    if (Children.size() == ChildScopeCount) {
+    if (!HasNonScopeChildren) {
       FinalChildren.insert(FinalChildren.end(),
                            std::make_move_iterator(Children.begin()),
                            std::make_move_iterator(Children.end()));
@@ -488,14 +500,12 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   if (const MachineInstr *DVInsn = DV.getMInsn()) {
     assert(DVInsn->getNumOperands() == 4);
     if (DVInsn->getOperand(0).isReg()) {
-      const MachineOperand RegOp = DVInsn->getOperand(0);
+      auto RegOp = DVInsn->getOperand(0);
+      auto Op1 = DVInsn->getOperand(1);
       // If the second operand is an immediate, this is an indirect value.
-      if (DVInsn->getOperand(1).isImm()) {
-        MachineLocation Location(RegOp.getReg(),
-                                 DVInsn->getOperand(1).getImm());
-        addVariableAddress(DV, *VariableDie, Location);
-      } else if (RegOp.getReg())
-        addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
+      assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
+      MachineLocation Location(RegOp.getReg(), Op1.isImm());
+      addVariableAddress(DV, *VariableDie, Location);
     } else if (DVInsn->getOperand(0).isImm()) {
       // This variable is described by a single constant.
       // Check whether it has a DIExpression.
@@ -557,20 +567,27 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
 
 DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
                                               SmallVectorImpl<DIE *> &Children,
-                                              unsigned *ChildScopeCount) {
+                                              bool *HasNonScopeChildren) {
+  assert(Children.empty());
   DIE *ObjectPointer = nullptr;
 
   for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope))
     Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
 
-  unsigned ChildCountWithoutScopes = Children.size();
+  // Skip imported directives in gmlt-like data.
+  if (!includeMinimalInlineScopes()) {
+    // There is no need to emit empty lexical block DIE.
+    for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
+      Children.push_back(
+          constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
+  }
+
+  if (HasNonScopeChildren)
+    *HasNonScopeChildren = !Children.empty();
 
   for (LexicalScope *LS : Scope->getChildren())
     constructScopeDIE(LS, Children);
 
-  if (ChildScopeCount)
-    *ChildScopeCount = Children.size() - ChildCountWithoutScopes;
-
   return ObjectPointer;
 }
 
@@ -726,7 +743,7 @@ DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(
 void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
                                         LexicalScope *Scope) {
   assert(Scope && Scope->isAbstractScope());
-  auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
+  auto AbsDbgVariable = llvm::make_unique<DbgVariable>(Var, /* IA */ nullptr);
   DU->addScopeVariable(Scope, AbsDbgVariable.get());
   getAbstractVariables()[Var] = std::move(AbsDbgVariable);
 }
@@ -744,10 +761,19 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
   DwarfUnit::emitCommonHeader(UseOffsets, UT);
 }
 
+bool DwarfCompileUnit::hasDwarfPubSections() const {
+  // Opting in to GNU Pubnames/types overrides the default to ensure these are
+  // generated for things like Gold's gdb_index generation.
+  if (CUNode->getGnuPubnames())
+    return true;
+
+  return DD->tuneForGDB() && !includeMinimalInlineScopes();
+}
+
 /// addGlobalName - Add a new global name to the compile unit.
 void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
                                      const DIScope *Context) {
-  if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+  if (!hasDwarfPubSections())
     return;
   std::string FullName = getParentContextString(Context) + Name.str();
   GlobalNames[FullName] = &Die;
@@ -755,7 +781,7 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
 
 void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
                                                 const DIScope *Context) {
-  if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+  if (!hasDwarfPubSections())
     return;
   std::string FullName = getParentContextString(Context) + Name.str();
   // Insert, allowing the entry to remain as-is if it's already present
@@ -768,7 +794,7 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
 /// Add a new global type to the unit.
 void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
                                      const DIScope *Context) {
-  if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+  if (!hasDwarfPubSections())
     return;
   std::string FullName = getParentContextString(Context) + Ty->getName().str();
   GlobalTypes[FullName] = &Die;
@@ -776,7 +802,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
 
 void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
                                              const DIScope *Context) {
-  if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+  if (!hasDwarfPubSections())
     return;
   std::string FullName = getParentContextString(Context) + Ty->getName().str();
   // Insert, allowing the entry to remain as-is if it's already present
@@ -790,6 +816,12 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
 /// DbgVariable based on provided MachineLocation.
 void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
                                           MachineLocation Location) {
+  // addBlockByrefAddress is obsolete and will be removed soon.
+  // The clang frontend always generates block byref variables with a
+  // complex expression that encodes exactly what addBlockByrefAddress
+  // would do.
+  assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) &&
+         "block byref variable without a complex expression");
   if (DV.hasComplexAddress())
     addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
   else if (DV.isBlockByrefVariable())
@@ -806,12 +838,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
   if (Location.isIndirect())
     DwarfExpr.setMemoryLocationKind();
 
-  SmallVector<uint64_t, 8> Ops;
-  if (Location.isIndirect() && Location.getOffset()) {
-    Ops.push_back(dwarf::DW_OP_plus_uconst);
-    Ops.push_back(Location.getOffset());
-  }
-  DIExpressionCursor Cursor(Ops);
+  DIExpressionCursor Cursor({});
   const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
   if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
     return;
@@ -835,13 +862,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
   if (Location.isIndirect())
     DwarfExpr.setMemoryLocationKind();
 
-  SmallVector<uint64_t, 8> Ops;
-  if (Location.isIndirect() && Location.getOffset()) {
-    Ops.push_back(dwarf::DW_OP_plus_uconst);
-    Ops.push_back(Location.getOffset());
-  }
-  Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
-  DIExpressionCursor Cursor(Ops);
+  DIExpressionCursor Cursor(DIExpr);
   const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
   if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
     return;
@@ -898,4 +919,3 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const {
   return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
-} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index e38672792867..68482eb7e358 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,19 +14,32 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 
+#include "DbgValueHistoryCalculator.h"
+#include "DwarfDebug.h"
 #include "DwarfUnit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
 
 namespace llvm {
 
-class StringRef;
 class AsmPrinter;
-class DIE;
-class DwarfDebug;
 class DwarfFile;
+class GlobalVariable;
+class MCExpr;
 class MCSymbol;
-class LexicalScope;
+class MDNode;
 
 class DwarfCompileUnit final : public DwarfUnit {
   /// A numeric ID unique among all CUs in the module
@@ -37,7 +50,7 @@ class DwarfCompileUnit final : public DwarfUnit {
   DIE::value_iterator StmtListValue;
 
   /// Skeleton unit associated with this unit.
-  DwarfCompileUnit *Skeleton;
+  DwarfCompileUnit *Skeleton = nullptr;
 
   /// The start of the unit within its section.
   MCSymbol *LabelBegin;
@@ -45,9 +58,8 @@ class DwarfCompileUnit final : public DwarfUnit {
   /// The start of the unit macro info within macro section.
   MCSymbol *MacroLabelBegin;
 
-  typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList;
-  typedef llvm::DenseMap<const MDNode *, ImportedEntityList>
-  ImportedEntityMap;
+  using ImportedEntityList = SmallVector<const MDNode *, 8>;
+  using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>;
 
   ImportedEntityMap ImportedEntities;
 
@@ -66,7 +78,7 @@ class DwarfCompileUnit final : public DwarfUnit {
 
   // The base address of this unit, if any. Used for relative references in
   // ranges/locs.
-  const MCSymbol *BaseAddress;
+  const MCSymbol *BaseAddress = nullptr;
 
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
   DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
@@ -164,6 +176,7 @@ public:
 
   void attachRangesOrLowHighPC(DIE &D,
                                const SmallVectorImpl<InsnRange> &Ranges);
+
   /// \brief This scope represents inlined body of a function. Construct
   /// DIE to represent this concrete inlined copy of the function.
   DIE *constructInlinedScopeDIE(LexicalScope *Scope);
@@ -181,7 +194,7 @@ public:
   /// A helper function to create children of a Scope DIE.
   DIE *createScopeChildrenDIE(LexicalScope *Scope,
                               SmallVectorImpl<DIE *> &Children,
-                              unsigned *ChildScopeCount = nullptr);
+                              bool *HasNonScopeChildren = nullptr);
 
   /// \brief Construct a DIE for this subprogram scope.
   void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
@@ -195,8 +208,9 @@ public:
 
   void finishSubprogramDefinition(const DISubprogram *SP);
   void finishVariableDefinition(const DbgVariable &Var);
+
   /// Find abstract variable associated with Var.
-  typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+  using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
   DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
                                            const DILocalVariable *&Cleansed);
   DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
@@ -275,8 +289,10 @@ public:
 
   void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
   const MCSymbol *getBaseAddress() const { return BaseAddress; }
+
+  bool hasDwarfPubSections() const;
 };
 
-} // end llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index f1b4d9f20ca9..2c9c7d4f3146 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,43 +15,67 @@
 #include "ByteStreamer.h"
 #include "DIEHash.h"
 #include "DebugLocEntry.h"
+#include "DebugLocStream.h"
+#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
 #include "DwarfExpression.h"
+#include "DwarfFile.h"
 #include "DwarfUnit.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/ValueHandle.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -61,10 +85,9 @@ static cl::opt<bool>
 DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
                          cl::desc("Disable debug info printing"));
 
-static cl::opt<bool>
-GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
-                       cl::desc("Generate GNU-style pubnames and pubtypes"),
-                       cl::init(false));
+static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
+    "use-dwarf-ranges-base-address-specifier", cl::Hidden,
+    cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));
 
 static cl::opt<bool> GenerateARangeSection("generate-arange-section",
                                            cl::Hidden,
@@ -75,9 +98,7 @@ static cl::opt<bool> SplitDwarfCrossCuReferences(
     "split-dwarf-cross-cu-references", cl::Hidden,
     cl::desc("Enable cross-cu references in DWO files"), cl::init(false));
 
-namespace {
 enum DefaultOnOff { Default, Enable, Disable };
-}
 
 static cl::opt<DefaultOnOff> UnknownLocations(
     "use-unknown-locations", cl::Hidden,
@@ -94,19 +115,12 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
                             clEnumVal(Disable, "Disabled")),
                  cl::init(Default));
 
-static cl::opt<DefaultOnOff>
-DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
-                 cl::desc("Generate DWARF pubnames and pubtypes sections"),
-                 cl::values(clEnumVal(Default, "Default for platform"),
-                            clEnumVal(Enable, "Enabled"),
-                            clEnumVal(Disable, "Disabled")),
-                 cl::init(Default));
-
 enum LinkageNameOption {
   DefaultLinkageNames,
   AllLinkageNames,
   AbstractLinkageNames
 };
+
 static cl::opt<LinkageNameOption>
     DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
                       cl::desc("Which DWARF linkage-name attributes to emit."),
@@ -142,8 +156,6 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
   return false;
 }
 
-//===----------------------------------------------------------------------===//
-
 bool DbgVariable::isBlockByrefVariable() const {
   assert(Var && "Invalid complex DbgVariable!");
   return Var->getType().resolve()->isBlockByrefStruct();
@@ -198,17 +210,54 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
   if (FrameIndexExprs.size() == 1)
     return FrameIndexExprs;
 
-  assert(all_of(FrameIndexExprs,
-                [](const FrameIndexExpr &A) { return A.Expr->isFragment(); }) &&
+  assert(llvm::all_of(FrameIndexExprs,
+                      [](const FrameIndexExpr &A) {
+                        return A.Expr->isFragment();
+                      }) &&
          "multiple FI expressions without DW_OP_LLVM_fragment");
   std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
             [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
               return A.Expr->getFragmentInfo()->OffsetInBits <
                      B.Expr->getFragmentInfo()->OffsetInBits;
             });
+
   return FrameIndexExprs;
 }
 
+void DbgVariable::addMMIEntry(const DbgVariable &V) {
+  assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
+  assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+  assert(V.Var == Var && "conflicting variable");
+  assert(V.IA == IA && "conflicting inlined-at location");
+
+  assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
+  assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
+
+  // FIXME: This logic should not be necessary anymore, as we now have proper
+  // deduplication. However, without it, we currently run into the assertion
+  // below, which means that we are likely dealing with broken input, i.e. two
+  // non-fragment entries for the same variable at different frame indices.
+  if (FrameIndexExprs.size()) {
+    auto *Expr = FrameIndexExprs.back().Expr;
+    if (!Expr || !Expr->isFragment())
+      return;
+  }
+
+  for (const auto &FIE : V.FrameIndexExprs)
+    // Ignore duplicate entries.
+    if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) {
+          return FIE.FI == Other.FI && FIE.Expr == Other.Expr;
+        }))
+      FrameIndexExprs.push_back(FIE);
+
+  assert((FrameIndexExprs.size() == 1 ||
+          llvm::all_of(FrameIndexExprs,
+                       [](FrameIndexExpr &FIE) {
+                         return FIE.Expr && FIE.Expr->isFragment();
+                       })) &&
+         "conflicting locations for variable");
+}
+
 static const DwarfAccelTable::Atom TypeAtoms[] = {
     DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
     DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
@@ -225,9 +274,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
                                       dwarf::DW_FORM_data4)),
       AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                            dwarf::DW_FORM_data4)),
-      AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
-
-  CurFn = nullptr;
+      AccelTypes(TypeAtoms) {
   const Triple &TT = Asm->TM.getTargetTriple();
 
   // Make sure we know our "debugger tuning."  The target option takes
@@ -278,7 +325,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
 }
 
 // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
-DwarfDebug::~DwarfDebug() { }
+DwarfDebug::~DwarfDebug() = default;
 
 static bool isObjCClass(StringRef Name) {
   return Name.startswith("+") || Name.startswith("-");
@@ -389,20 +436,8 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
   }
 }
 
-bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const {
-  // Opting in to GNU Pubnames/types overrides the default to ensure these are
-  // generated for things like Gold's gdb_index generation.
-  if (GenerateGnuPubSections)
-    return true;
-
-  if (DwarfPubSections == Default)
-    return tuneForGDB() && !includeMinimalInlineScopes;
-
-  return DwarfPubSections == Enable;
-}
-
 void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
-  if (!hasDwarfPubSections(U.includeMinimalInlineScopes()))
+  if (!U.hasDwarfPubSections())
     return;
 
   U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
@@ -417,7 +452,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
   StringRef FN = DIUnit->getFilename();
   CompilationDir = DIUnit->getDirectory();
 
-  auto OwnedUnit = make_unique<DwarfCompileUnit>(
+  auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
       InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
   DwarfCompileUnit &NewCU = *OwnedUnit;
   DIE &Die = NewCU.getUnitDie();
@@ -428,6 +463,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
                   Asm->TM.Options.MCOptions.SplitDwarfFile);
   }
 
+  for (auto *IE : DIUnit->getImportedEntities())
+    NewCU.addImportedEntity(IE);
+
   // LTO with assembly output shares a single line table amongst multiple CUs.
   // To avoid the compilation directory being ambiguous, let the line table
   // explicitly describe the directory of all files, never relying on the
@@ -494,6 +532,8 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
 
 void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
                                                   const DIImportedEntity *N) {
+  if (isa<DILocalScope>(N->getScope()))
+    return;
   if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
     D->addChild(TheCU.constructImportedEntityDIE(N));
 }
@@ -503,13 +543,18 @@ static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
 sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
   std::sort(GVEs.begin(), GVEs.end(),
             [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
-              if (A.Expr != B.Expr && A.Expr && B.Expr) {
-		auto FragmentA = A.Expr->getFragmentInfo();
-		auto FragmentB = B.Expr->getFragmentInfo();
-		if (FragmentA && FragmentB)
-		  return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
-	      }
-              return false;
+              // Sort order: first null exprs, then exprs without fragment
+              // info, then sort by fragment offset in bits.
+              // FIXME: Come up with a more comprehensive comparator so
+              // the sorting isn't non-deterministic, and so the following
+              // std::unique call works correctly.
+              if (!A.Expr || !B.Expr)
+                return !!B.Expr;
+              auto FragmentA = A.Expr->getFragmentInfo();
+              auto FragmentB = B.Expr->getFragmentInfo();
+              if (!FragmentA || !FragmentB)
+                return !!FragmentB;
+              return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
             });
   GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
                          [](DwarfCompileUnit::GlobalExpr A,
@@ -546,18 +591,31 @@ void DwarfDebug::beginModule() {
   }
 
   for (DICompileUnit *CUNode : M->debug_compile_units()) {
-    if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() &&
-        CUNode->getGlobalVariables().empty() &&
-        CUNode->getImportedEntities().empty() && CUNode->getMacros().empty())
+    // FIXME: Move local imported entities into a list attached to the
+    // subprogram, then this search won't be needed and a
+    // getImportedEntities().empty() test should go below with the rest.
+    bool HasNonLocalImportedEntities = llvm::any_of(
+        CUNode->getImportedEntities(), [](const DIImportedEntity *IE) {
+          return !isa<DILocalScope>(IE->getScope());
+        });
+
+    if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() &&
+        CUNode->getRetainedTypes().empty() &&
+        CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty())
       continue;
 
     DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode);
-    for (auto *IE : CUNode->getImportedEntities())
-      CU.addImportedEntity(IE);
 
     // Global Variables.
-    for (auto *GVE : CUNode->getGlobalVariables())
-      GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()});
+    for (auto *GVE : CUNode->getGlobalVariables()) {
+      // Don't bother adding DIGlobalVariableExpressions listed in the CU if we
+      // already know about the variable and it isn't adding a constant
+      // expression.
+      auto &GVMapEntry = GVMap[GVE->getVariable()];
+      auto *Expr = GVE->getExpression();
+      if (!GVMapEntry.size() || (Expr && Expr->isConstant()))
+        GVMapEntry.push_back({nullptr, Expr});
+    }
     DenseSet<DIGlobalVariable *> Processed;
     for (auto *GVE : CUNode->getGlobalVariables()) {
       DIGlobalVariable *GV = GVE->getVariable();
@@ -682,6 +740,11 @@ void DwarfDebug::finalizeModuleInfo() {
                         TLOF.getDwarfMacinfoSection()->getBeginSymbol());
   }
 
+  // Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
+  for (auto *CUNode : MMI->getModule()->debug_compile_units())
+    if (CUNode->getDWOId())
+      getOrCreateDwarfCompileUnit(CUNode);
+
   // Compute DIE offsets and sizes.
   InfoHolder.computeSizeAndOffsets();
   if (useSplitDwarf())
@@ -744,12 +807,7 @@ void DwarfDebug::endModule() {
   }
 
   // Emit the pubnames and pubtypes sections if requested.
-  // The condition is optimistically correct - any CU not using GMLT (&
-  // implicit/default pubnames state) might still have pubnames.
-  if (hasDwarfPubSections(/* gmlt */ false)) {
-    emitDebugPubNames(GenerateGnuPubSections);
-    emitDebugPubTypes(GenerateGnuPubSections);
-  }
+  emitDebugPubSections();
 
   // clean up.
   // FIXME: AbstractVariables.clear();
@@ -775,9 +833,11 @@ void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU,
           LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
     CU.createAbstractVariable(Cleansed, Scope);
 }
+
 // Collect variable information from side table maintained by MF.
 void DwarfDebug::collectVariableInfoFromMFTable(
     DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) {
+  SmallDenseMap<InlinedVariable, DbgVariable *> MFVars;
   for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
@@ -793,26 +853,28 @@ void DwarfDebug::collectVariableInfoFromMFTable(
       continue;
 
     ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode());
-    auto RegVar = make_unique<DbgVariable>(Var.first, Var.second);
+    auto RegVar = llvm::make_unique<DbgVariable>(Var.first, Var.second);
     RegVar->initializeMMI(VI.Expr, VI.Slot);
-    if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
+    if (DbgVariable *DbgVar = MFVars.lookup(Var))
+      DbgVar->addMMIEntry(*RegVar);
+    else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
+      MFVars.insert({Var, RegVar.get()});
       ConcreteVariables.push_back(std::move(RegVar));
+    }
   }
 }
 
 // Get .debug_loc entry for the instruction range starting at MI.
 static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
   const DIExpression *Expr = MI->getDebugExpression();
-
   assert(MI->getNumOperands() == 4);
   if (MI->getOperand(0).isReg()) {
-    MachineLocation MLoc;
+    auto RegOp = MI->getOperand(0);
+    auto Op1 = MI->getOperand(1);
     // If the second operand is an immediate, this is a
     // register-indirect address.
-    if (!MI->getOperand(1).isImm())
-      MLoc.set(MI->getOperand(0).getReg());
-    else
-      MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+    assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
+    MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
     return DebugLocEntry::Value(Expr, MLoc);
   }
   if (MI->getOperand(0).isImm())
@@ -967,7 +1029,8 @@ DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
                                                 LexicalScope &Scope,
                                                 InlinedVariable IV) {
   ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode());
-  ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second));
+  ConcreteVariables.push_back(
+      llvm::make_unique<DbgVariable>(IV.first, IV.second));
   InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
   return ConcreteVariables.back().get();
 }
@@ -1100,7 +1163,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   DebugHandlerBase::beginInstruction(MI);
   assert(CurMI);
 
-  const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram();
+  const auto *SP = MI->getMF()->getFunction().getSubprogram();
   if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
     return;
 
@@ -1198,7 +1261,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
 void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
   CurFn = MF;
 
-  auto *SP = MF->getFunction()->getSubprogram();
+  auto *SP = MF->getFunction().getSubprogram();
   assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
   if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
     return;
@@ -1234,7 +1297,7 @@ void DwarfDebug::skippedNonDebugFunction() {
 
 // Gather and emit post-function debug information.
 void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
-  const DISubprogram *SP = MF->getFunction()->getSubprogram();
+  const DISubprogram *SP = MF->getFunction().getSubprogram();
 
   assert(CurFn == MF &&
       "endFunction should be called with the same function as beginFunction");
@@ -1309,8 +1372,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
   if (auto *Scope = cast_or_null<DIScope>(S)) {
     Fn = Scope->getFilename();
     Dir = Scope->getDirectory();
-    if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
-      if (getDwarfVersion() >= 4)
+    if (Line != 0 && getDwarfVersion() >= 4)
+      if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
         Discriminator = LBF->getDiscriminator();
 
     unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
@@ -1440,84 +1503,74 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
   }
 }
 
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
-  MCSection *PSec = GnuStyle
-                        ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
-                        : Asm->getObjFileLowering().getDwarfPubNamesSection();
-
-  emitDebugPubSection(GnuStyle, PSec, "Names",
-                      &DwarfCompileUnit::getGlobalNames);
-}
-
-void DwarfDebug::emitDebugPubSection(
-    bool GnuStyle, MCSection *PSec, StringRef Name,
-    const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
+/// emitDebugPubSections - Emit visible names and types into debug pubnames and
+/// pubtypes sections.
+void DwarfDebug::emitDebugPubSections() {
   for (const auto &NU : CUMap) {
     DwarfCompileUnit *TheU = NU.second;
-
-    const auto &Globals = (TheU->*Accessor)();
-
-    if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes()))
+    if (!TheU->hasDwarfPubSections())
       continue;
 
-    if (auto *Skeleton = TheU->getSkeleton())
-      TheU = Skeleton;
+    bool GnuStyle = TheU->getCUNode()->getGnuPubnames();
 
-    // Start the dwarf pubnames section.
-    Asm->OutStreamer->SwitchSection(PSec);
+    Asm->OutStreamer->SwitchSection(
+        GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
+                 : Asm->getObjFileLowering().getDwarfPubNamesSection());
+    emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames());
 
-    // Emit the header.
-    Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
-    MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
-    MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
-    Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+    Asm->OutStreamer->SwitchSection(
+        GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
+                 : Asm->getObjFileLowering().getDwarfPubTypesSection());
+    emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes());
+  }
+}
 
-    Asm->OutStreamer->EmitLabel(BeginLabel);
+void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
+                                     DwarfCompileUnit *TheU,
+                                     const StringMap<const DIE *> &Globals) {
+  if (auto *Skeleton = TheU->getSkeleton())
+    TheU = Skeleton;
 
-    Asm->OutStreamer->AddComment("DWARF Version");
-    Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
+  // Emit the header.
+  Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
+  MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
+  MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
+  Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-    Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
-    Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
+  Asm->OutStreamer->EmitLabel(BeginLabel);
 
-    Asm->OutStreamer->AddComment("Compilation Unit Length");
-    Asm->EmitInt32(TheU->getLength());
+  Asm->OutStreamer->AddComment("DWARF Version");
+  Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
 
-    // Emit the pubnames for this compilation unit.
-    for (const auto &GI : Globals) {
-      const char *Name = GI.getKeyData();
-      const DIE *Entity = GI.second;
+  Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
+  Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
 
-      Asm->OutStreamer->AddComment("DIE offset");
-      Asm->EmitInt32(Entity->getOffset());
+  Asm->OutStreamer->AddComment("Compilation Unit Length");
+  Asm->EmitInt32(TheU->getLength());
 
-      if (GnuStyle) {
-        dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
-        Asm->OutStreamer->AddComment(
-            Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
-            dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
-        Asm->EmitInt8(Desc.toBits());
-      }
+  // Emit the pubnames for this compilation unit.
+  for (const auto &GI : Globals) {
+    const char *Name = GI.getKeyData();
+    const DIE *Entity = GI.second;
+
+    Asm->OutStreamer->AddComment("DIE offset");
+    Asm->EmitInt32(Entity->getOffset());
 
-      Asm->OutStreamer->AddComment("External Name");
-      Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
+    if (GnuStyle) {
+      dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
+      Asm->OutStreamer->AddComment(
+          Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
+          dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+      Asm->EmitInt8(Desc.toBits());
     }
 
-    Asm->OutStreamer->AddComment("End Mark");
-    Asm->EmitInt32(0);
-    Asm->OutStreamer->EmitLabel(EndLabel);
+    Asm->OutStreamer->AddComment("External Name");
+    Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
   }
-}
-
-void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
-  MCSection *PSec = GnuStyle
-                        ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
-                        : Asm->getObjFileLowering().getDwarfPubTypesSection();
 
-  emitDebugPubSection(GnuStyle, PSec, "Types",
-                      &DwarfCompileUnit::getGlobalTypes);
+  Asm->OutStreamer->AddComment("End Mark");
+  Asm->EmitInt32(0);
+  Asm->OutStreamer->EmitLabel(EndLabel);
 }
 
 /// Emit null-terminated strings into a debug str section.
@@ -1553,13 +1606,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
     MachineLocation Location = Value.getLoc();
     if (Location.isIndirect())
       DwarfExpr.setMemoryLocationKind();
-    SmallVector<uint64_t, 8> Ops;
-    if (Location.isIndirect() && Location.getOffset()) {
-      Ops.push_back(dwarf::DW_OP_plus_uconst);
-      Ops.push_back(Location.getOffset());
-    }
-    Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
-    DIExpressionCursor Cursor(Ops);
+    DIExpressionCursor Cursor(DIExpr);
     const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
     if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
       return;
@@ -1580,7 +1627,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
   const DebugLocEntry::Value &Value = Values[0];
   if (Value.isFragment()) {
     // Emit all fragments that belong to the same variable and range.
-    assert(all_of(Values, [](DebugLocEntry::Value P) {
+    assert(llvm::all_of(Values, [](DebugLocEntry::Value P) {
           return P.isFragment();
         }) && "all values are expected to be fragments");
     assert(std::is_sorted(Values.begin(), Values.end()) &&
@@ -1844,17 +1891,49 @@ void DwarfDebug::emitDebugRanges() {
       // Emit our symbol so we can find the beginning of the range.
       Asm->OutStreamer->EmitLabel(List.getSym());
 
+      // Gather all the ranges that apply to the same section so they can share
+      // a base address entry.
+      MapVector<const MCSection *, std::vector<const RangeSpan *>> MV;
       for (const RangeSpan &Range : List.getRanges()) {
-        const MCSymbol *Begin = Range.getStart();
-        const MCSymbol *End = Range.getEnd();
-        assert(Begin && "Range without a begin symbol?");
-        assert(End && "Range without an end symbol?");
-        if (auto *Base = TheCU->getBaseAddress()) {
-          Asm->EmitLabelDifference(Begin, Base, Size);
-          Asm->EmitLabelDifference(End, Base, Size);
-        } else {
-          Asm->OutStreamer->EmitSymbolValue(Begin, Size);
-          Asm->OutStreamer->EmitSymbolValue(End, Size);
+        MV[&Range.getStart()->getSection()].push_back(&Range);
+      }
+
+      auto *CUBase = TheCU->getBaseAddress();
+      bool BaseIsSet = false;
+      for (const auto &P : MV) {
+        // Don't bother with a base address entry if there's only one range in
+        // this section in this range list - for example ranges for a CU will
+        // usually consist of single regions from each of many sections
+        // (-ffunction-sections, or just C++ inline functions) except under LTO
+        // or optnone where there may be holes in a single CU's section
+        // contrubutions.
+        auto *Base = CUBase;
+        if (!Base && P.second.size() > 1 &&
+            UseDwarfRangesBaseAddressSpecifier) {
+          BaseIsSet = true;
+          // FIXME/use care: This may not be a useful base address if it's not
+          // the lowest address/range in this object.
+          Base = P.second.front()->getStart();
+          Asm->OutStreamer->EmitIntValue(-1, Size);
+          Asm->OutStreamer->EmitSymbolValue(Base, Size);
+        } else if (BaseIsSet) {
+          BaseIsSet = false;
+          Asm->OutStreamer->EmitIntValue(-1, Size);
+          Asm->OutStreamer->EmitIntValue(0, Size);
+        }
+
+        for (const auto *RS : P.second) {
+          const MCSymbol *Begin = RS->getStart();
+          const MCSymbol *End = RS->getEnd();
+          assert(Begin && "Range without a begin symbol?");
+          assert(End && "Range without an end symbol?");
+          if (Base) {
+            Asm->EmitLabelDifference(Begin, Base, Size);
+            Asm->EmitLabelDifference(End, Base, Size);
+          } else {
+            Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+            Asm->OutStreamer->EmitSymbolValue(End, Size);
+          }
         }
       }
 
@@ -1943,7 +2022,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
 // DW_AT_addr_base, DW_AT_ranges_base.
 DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
 
-  auto OwnedUnit = make_unique<DwarfCompileUnit>(
+  auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
       CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
   DwarfCompileUnit &NewCU = *OwnedUnit;
   NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
@@ -2024,8 +2103,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
   bool TopLevelType = TypeUnitsUnderConstruction.empty();
   AddrPool.resetUsedFlag();
 
-  auto OwnedUnit = make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
-                                              getDwoLineTable(CU));
+  auto OwnedUnit = llvm::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
+                                                    getDwoLineTable(CU));
   DwarfTypeUnit &NewTU = *OwnedUnit;
   DIE &UnitDie = NewTU.getUnitDie();
   TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 78ee9a162029..2ae0b418a91e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,40 +14,52 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
 
+#include "AddressPool.h"
 #include "DbgValueHistoryCalculator.h"
 #include "DebugHandlerBase.h"
 #include "DebugLocStream.h"
 #include "DwarfAccelTable.h"
 #include "DwarfFile.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/DIE.h"
-#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstdint>
+#include <limits>
 #include <memory>
+#include <utility>
+#include <vector>
 
 namespace llvm {
 
 class AsmPrinter;
 class ByteStreamer;
-class ConstantInt;
-class ConstantFP;
 class DebugLocEntry;
+class DIE;
 class DwarfCompileUnit;
-class DwarfDebug;
 class DwarfTypeUnit;
 class DwarfUnit;
-class MachineModuleInfo;
+class LexicalScope;
+class MachineFunction;
+class MCSection;
+class MCSymbol;
+class MDNode;
+class Module;
 
 //===----------------------------------------------------------------------===//
 /// This class is used to track local variable information.
@@ -89,7 +101,7 @@ public:
     assert(!MInsn && "Already initialized?");
 
     assert((!E || E->isValid()) && "Expected valid expression");
-    assert(FI != INT_MAX && "Expected valid index");
+    assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
 
     FrameIndexExprs.push_back({FI, E});
   }
@@ -111,10 +123,12 @@ public:
   // Accessors.
   const DILocalVariable *getVariable() const { return Var; }
   const DILocation *getInlinedAt() const { return IA; }
+
   const DIExpression *getSingleExpression() const {
     assert(MInsn && FrameIndexExprs.size() <= 1);
     return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
   }
+
   void setDIE(DIE &D) { TheDIE = &D; }
   DIE *getDIE() const { return TheDIE; }
   void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
@@ -124,30 +138,7 @@ public:
   /// Get the FI entries, sorted by fragment offset.
   ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
   bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
-
-  void addMMIEntry(const DbgVariable &V) {
-    assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
-    assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
-    assert(V.Var == Var && "conflicting variable");
-    assert(V.IA == IA && "conflicting inlined-at location");
-
-    assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
-    assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
-
-    if (FrameIndexExprs.size()) {
-      auto *Expr = FrameIndexExprs.back().Expr;
-      // Get rid of duplicate non-fragment entries. More than one non-fragment
-      // dbg.declare makes no sense so ignore all but the first.
-      if (!Expr || !Expr->isFragment())
-        return;
-    }
-    FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end());
-    assert(all_of(FrameIndexExprs,
-                  [](FrameIndexExpr &FIE) {
-                    return FIE.Expr && FIE.Expr->isFragment();
-                  }) &&
-           "conflicting locations for variable");
-  }
+  void addMMIEntry(const DbgVariable &V);
 
   // Translate tag to proper Dwarf tag.
   dwarf::Tag getTag() const {
@@ -157,6 +148,7 @@ public:
 
     return dwarf::DW_TAG_variable;
   }
+
   /// Return true if DbgVariable is artificial.
   bool isArtificial() const {
     if (Var->isArtificial())
@@ -182,6 +174,7 @@ public:
            "Invalid Expr for DBG_VALUE");
     return !FrameIndexExprs.empty();
   }
+
   bool isBlockByrefVariable() const;
   const DIType *getType() const;
 
@@ -191,10 +184,10 @@ private:
   }
 };
 
-
 /// Helper used to pair up a symbol and its DWARF compile unit.
 struct SymbolCU {
   SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
+
   const MCSymbol *Sym;
   DwarfCompileUnit *CU;
 };
@@ -230,7 +223,7 @@ class DwarfDebug : public DebugHandlerBase {
       ProcessedSPNodes;
 
   /// If nonnull, stores the current machine function we're processing.
-  const MachineFunction *CurFn;
+  const MachineFunction *CurFn = nullptr;
 
   /// If nonnull, stores the CU in which the previous subprogram was contained.
   const DwarfCompileUnit *PrevCU;
@@ -296,17 +289,7 @@ class DwarfDebug : public DebugHandlerBase {
   DwarfAccelTable AccelTypes;
 
   // Identify a debugger for "tuning" the debug info.
-  DebuggerKind DebuggerTuning;
-
-  /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
-  ///
-  /// Returns whether we are "tuning" for a given debugger.
-  /// Should be used only within the constructor, to set feature flags.
-  /// @{
-  bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
-  bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
-  bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
-  /// @}
+  DebuggerKind DebuggerTuning = DebuggerKind::Default;
 
   MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
 
@@ -314,7 +297,7 @@ class DwarfDebug : public DebugHandlerBase {
     return InfoHolder.getUnits();
   }
 
-  typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+  using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
 
   void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var,
                                        const MDNode *Scope);
@@ -358,21 +341,12 @@ class DwarfDebug : public DebugHandlerBase {
   /// Emit type dies into a hashed accelerator table.
   void emitAccelTypes();
 
-  /// Emit visible names into a debug pubnames section.
-  /// \param GnuStyle determines whether or not we want to emit
-  /// additional information into the table ala newer gcc for gdb
-  /// index.
-  void emitDebugPubNames(bool GnuStyle = false);
+  /// Emit visible names and types into debug pubnames and pubtypes sections.
+  void emitDebugPubSections();
 
-  /// Emit visible types into a debug pubtypes section.
-  /// \param GnuStyle determines whether or not we want to emit
-  /// additional information into the table ala newer gcc for gdb
-  /// index.
-  void emitDebugPubTypes(bool GnuStyle = false);
-
-  void emitDebugPubSection(
-      bool GnuStyle, MCSection *PSec, StringRef Name,
-      const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
+  void emitDebugPubSection(bool GnuStyle, StringRef Name,
+                           DwarfCompileUnit *TheU,
+                           const StringMap<const DIE *> &Globals);
 
   /// Emit null-terminated strings into a debug str section.
   void emitDebugStr();
@@ -561,11 +535,19 @@ public:
   /// going to be null.
   bool isLexicalScopeDIENull(LexicalScope *Scope);
 
-  bool hasDwarfPubSections(bool includeMinimalInlineScopes) const;
-
   /// Find the matching DwarfCompileUnit for the given CU DIE.
   DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); }
+
+  /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+  ///
+  /// Returns whether we are "tuning" for a given debugger.
+  /// @{
+  bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+  bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+  bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+  /// @}
 };
-} // End of namespace llvm
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 3a8568cf39ae..68d25fe37b43 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===//
+//===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,13 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfExpression.h"
-#include "DwarfDebug.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 
@@ -128,6 +130,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
     unsigned Size = TRI.getSubRegIdxSize(Idx);
     unsigned Offset = TRI.getSubRegIdxOffset(Idx);
     Reg = TRI.getDwarfRegNum(*SR, false);
+    if (Reg < 0)
+      continue;
 
     // Intersection between the bits we already emitted and the bits
     // covered by this subregister.
@@ -136,14 +140,14 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
 
     // If this sub-register has a DWARF number and we haven't covered
     // its range, emit a DWARF piece for it.
-    if (Reg >= 0 && CurSubReg.test(Coverage)) {
+    if (CurSubReg.test(Coverage)) {
       // Emit a piece for any gap in the coverage.
       if (Offset > CurPos)
         DwarfRegs.push_back({-1, Offset - CurPos, nullptr});
       DwarfRegs.push_back(
           {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
       if (Offset >= MaxSize)
-	break;
+        break;
 
       // Mark it as emitted.
       Coverage.set(Offset, Offset + Size);
@@ -336,9 +340,10 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       break;
     case dwarf::DW_OP_plus:
     case dwarf::DW_OP_minus:
+    case dwarf::DW_OP_mul:
       emitOp(Op->getOp());
       break;
-    case dwarf::DW_OP_deref: {
+    case dwarf::DW_OP_deref:
       assert(LocationKind != Register);
       if (LocationKind != Memory && isMemoryLocation(ExprCursor))
         // Turning this into a memory location description makes the deref
@@ -347,7 +352,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       else
         emitOp(dwarf::DW_OP_deref);
       break;
-    }
     case dwarf::DW_OP_constu:
       assert(LocationKind != Register);
       emitOp(dwarf::DW_OP_constu);
@@ -383,7 +387,6 @@ void DwarfExpression::maskSubRegister() {
   addAnd(Mask);
 }
 
-
 void DwarfExpression::finalize() {
   assert(DwarfRegs.size() == 0 && "dwarf registers not emitted");
   // Emit any outstanding DW_OP_piece operations to mask out subregisters.
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 728f8ad9225b..ea5cbc40ba35 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===//
+//===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,21 +14,29 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
 
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
 
 namespace llvm {
 
 class AsmPrinter;
+class APInt;
 class ByteStreamer;
-class TargetRegisterInfo;
 class DwarfUnit;
 class DIELoc;
+class TargetRegisterInfo;
 
 /// Holds a DIExpression and keeps track of how many operands have been consumed
 /// so far.
 class DIExpressionCursor {
   DIExpression::expr_op_iterator Start, End;
+
 public:
   DIExpressionCursor(const DIExpression *Expr) {
     if (!Expr) {
@@ -42,8 +50,7 @@ public:
   DIExpressionCursor(ArrayRef<uint64_t> Expr)
       : Start(Expr.begin()), End(Expr.end()) {}
 
-  DIExpressionCursor(const DIExpressionCursor &C)
-      : Start(C.Start), End(C.End) {}
+  DIExpressionCursor(const DIExpressionCursor &) = default;
 
   /// Consume one operation.
   Optional<DIExpression::ExprOperand> take() {
@@ -73,8 +80,10 @@ public:
 
     return *Next;
   }
+
   /// Determine whether there are any operations left in this expression.
   operator bool() const { return Start != End; }
+
   DIExpression::expr_op_iterator begin() const { return Start; }
   DIExpression::expr_op_iterator end() const { return End; }
 
@@ -122,10 +131,13 @@ protected:
 
   /// Output a dwarf operand and an optional assembler comment.
   virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+
   /// Emit a raw signed value.
   virtual void emitSigned(int64_t Value) = 0;
+
   /// Emit a raw unsigned value.
   virtual void emitUnsigned(uint64_t Value) = 0;
+
   /// Return whether the given machine register is the frame register in the
   /// current function.
   virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
@@ -133,8 +145,10 @@ protected:
   /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
   /// register location description.
   void addReg(int DwarfReg, const char *Comment = nullptr);
+
   /// Emit a DW_OP_breg operation.
   void addBReg(int DwarfReg, int Offset);
+
   /// Emit DW_OP_fbreg <Offset>.
   void addFBReg(int Offset);
 
@@ -156,7 +170,6 @@ protected:
   bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
                      unsigned MaxSize = ~1U);
 
-
   /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
   /// \param OffsetInBits    This is an optional offset into the location that
   /// is at the top of the DWARF stack.
@@ -164,6 +177,7 @@ protected:
 
   /// Emit a shift-right dwarf operation.
   void addShr(unsigned ShiftBy);
+
   /// Emit a bitwise and dwarf operation.
   void addAnd(unsigned Mask);
 
@@ -181,6 +195,7 @@ protected:
   void addStackValue();
 
   ~DwarfExpression() = default;
+
 public:
   DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
 
@@ -189,8 +204,10 @@ public:
 
   /// Emit a signed constant.
   void addSignedConstant(int64_t Value);
+
   /// Emit an unsigned constant.
   void addUnsignedConstant(uint64_t Value);
+
   /// Emit an unsigned constant.
   void addUnsignedConstant(const APInt &Value);
 
@@ -213,6 +230,7 @@ public:
   bool addMachineRegExpression(const TargetRegisterInfo &TRI,
                                DIExpressionCursor &Expr, unsigned MachineReg,
                                unsigned FragmentOffsetInBits = 0);
+
   /// Emit all remaining operations in the DIExpressionCursor.
   ///
   /// \param FragmentOffsetInBits     If this is one fragment out of multiple
@@ -235,6 +253,7 @@ class DebugLocDwarfExpression final : public DwarfExpression {
   void emitUnsigned(uint64_t Value) override;
   bool isFrameRegister(const TargetRegisterInfo &TRI,
                        unsigned MachineReg) override;
+
 public:
   DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
       : DwarfExpression(DwarfVersion), BS(BS) {}
@@ -253,11 +272,13 @@ const AsmPrinter &AP;
                        unsigned MachineReg) override;
 public:
   DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
+
   DIELoc *finalize() {
     DwarfExpression::finalize();
     return &DIE;
   }
 };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 595f1d91c4bf..3c04c969192d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===//
+//===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,13 +11,16 @@
 #include "DwarfCompileUnit.h"
 #include "DwarfDebug.h"
 #include "DwarfUnit.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cstdint>
+
+using namespace llvm;
 
-namespace llvm {
 DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
     : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {}
 
@@ -113,4 +116,3 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   Vars.push_back(Var);
   return true;
 }
-}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 54924e9806ed..167ca13c19c1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===//
+//===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,30 +10,25 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
 
-#include "AddressPool.h"
 #include "DwarfStringPool.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include <memory>
+#include <utility>
 
 namespace llvm {
+
 class AsmPrinter;
 class DbgVariable;
 class DwarfCompileUnit;
 class DwarfUnit;
-class DIEAbbrev;
-class MCSymbol;
-class DIE;
 class LexicalScope;
-class StringRef;
-class DwarfDebug;
 class MCSection;
-class MDNode;
+
 class DwarfFile {
   // Target of Dwarf emission, used for sizing of abbreviations.
   AsmPrinter *Asm;
@@ -106,6 +101,7 @@ public:
   DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
     return AbstractSPDies;
   }
+
   DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
     return AbstractVariables;
   }
@@ -113,9 +109,12 @@ public:
   void insertDIE(const MDNode *TypeMD, DIE *Die) {
     DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
   }
+
   DIE *getDIE(const MDNode *TypeMD) {
     return DITypeNodeToDieMap.lookup(TypeMD);
   }
 };
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 2066f745e318..aa5f01e88933 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===//
+//===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,9 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfStringPool.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
+#include <cassert>
+#include <utility>
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 93a168485a54..1cac3b7c8432 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===//
+//===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,29 +11,28 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
 
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/Allocator.h"
-#include <utility>
 
 namespace llvm {
 
 class AsmPrinter;
-class MCSymbol;
 class MCSection;
-class StringRef;
 
 // Collection of strings for this unit and assorted symbols.
 // A String->Symbol mapping of strings used by indirect
 // references.
 class DwarfStringPool {
-  typedef DwarfStringPoolEntry EntryTy;
+  using EntryTy = DwarfStringPoolEntry;
+
   StringMap<EntryTy, BumpPtrAllocator &> Pool;
   StringRef Prefix;
   unsigned NumBytes = 0;
   bool ShouldCreateSymbols;
 
 public:
-  typedef DwarfStringPoolEntryRef EntryRef;
+  using EntryRef = DwarfStringPoolEntryRef;
 
   DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
 
@@ -45,5 +44,7 @@ public:
   /// Get a reference to an entry in the string pool.
   EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
 };
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 4f4ebfc56297..911e46235781 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -22,6 +22,9 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalValue.h"
@@ -33,9 +36,6 @@
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 #include <cstdint>
 #include <string>
@@ -473,11 +473,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   if (Location.isIndirect())
     DwarfExpr.setMemoryLocationKind();
 
-  SmallVector<uint64_t, 9> Ops;
-  if (Location.isIndirect() && Location.getOffset()) {
-    Ops.push_back(dwarf::DW_OP_plus_uconst);
-    Ops.push_back(Location.getOffset());
-  }
+  SmallVector<uint64_t, 6> Ops;
   // If we started with a pointer to the __Block_byref... struct, then
   // the first thing we need to do is dereference the pointer (DW_OP_deref).
   if (isPointer)
@@ -964,8 +960,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
 
     // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
     // inside C++ composite types to point to the base class with the vtable.
-    if (auto *ContainingType =
-            dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder())))
+    // Rust uses DW_AT_containing_type to link a vtable to the type
+    // for which it was created.
+    if (auto *ContainingType = resolve(CTy->getVTableHolder()))
       addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
                   *getOrCreateTypeDIE(ContainingType));
 
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e14d5be1177a..3cdab57bca70 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===//
+//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,22 +12,34 @@
 //===----------------------------------------------------------------------===//
 
 #include "EHStreamer.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <vector>
 
 using namespace llvm;
 
 EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
 
-EHStreamer::~EHStreamer() {}
+EHStreamer::~EHStreamer() = default;
 
 /// How many leading type ids two landing pads have in common.
 unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
@@ -50,7 +62,6 @@ unsigned EHStreamer::
 computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
                     SmallVectorImpl<ActionEntry> &Actions,
                     SmallVectorImpl<unsigned> &FirstActions) {
-
   // The action table follows the call-site table in the LSDA. The individual
   // records are of two types:
   //
@@ -478,13 +489,14 @@ void EHStreamer::emitExceptionTable() {
     sizeof(int8_t) +                            // TType format
     (HaveTTData ? TTypeBaseOffsetSize : 0) +    // TType base offset size
     TTypeBaseOffset;                            // TType base offset
-  unsigned SizeAlign = (4 - TotalSize) & 3;
+  unsigned PadBytes = (4 - TotalSize) & 3;
 
   if (HaveTTData) {
     // Account for any extra padding that will be added to the call site table
     // length.
-    Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
-    SizeAlign = 0;
+    Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes,
+                           "@TType base offset");
+    PadBytes = 0;
   }
 
   bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
@@ -494,7 +506,9 @@ void EHStreamer::emitExceptionTable() {
     Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
 
     // Add extra padding if it wasn't added to the TType base offset.
-    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+    Asm->EmitPaddedULEB128(CallSiteTableLength,
+                           CallSiteTableLengthSize + PadBytes,
+                           "Call site table length");
 
     // Emit the landing pad site information.
     unsigned idx = 0;
@@ -547,7 +561,9 @@ void EHStreamer::emitExceptionTable() {
     Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
 
     // Add extra padding if it wasn't added to the TType base offset.
-    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+    Asm->EmitPaddedULEB128(CallSiteTableLength,
+                           CallSiteTableLengthSize + PadBytes,
+                           "Call site table length");
 
     unsigned Entry = 0;
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 080fdd14b467..7962b761d8de 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -1,4 +1,4 @@
-//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===//
+//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,17 +16,16 @@
 
 #include "AsmPrinterHandler.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Compiler.h"
 
 namespace llvm {
+
+class AsmPrinter;
 struct LandingPadInfo;
-class MachineModuleInfo;
 class MachineInstr;
-class MachineFunction;
+class MachineModuleInfo;
 class MCSymbol;
-class MCSymbolRefExpr;
-
-template <typename T>
-class SmallVectorImpl;
+template <typename T> class SmallVectorImpl;
 
 /// Emits exception handling directives.
 class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler {
@@ -45,11 +44,12 @@ protected:
   struct PadRange {
     // The index of the landing pad.
     unsigned PadIndex;
+
     // The index of the begin and end labels in the landing pad's label lists.
     unsigned RangeIndex;
   };
 
-  typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+  using RangeMapType = DenseMap<MCSymbol *, PadRange>;
 
   /// Structure describing an entry in the actions table.
   struct ActionEntry {
@@ -66,6 +66,7 @@ protected:
 
     // LPad contains the landing pad start labels.
     const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
+
     unsigned Action;
   };
 
@@ -131,7 +132,7 @@ public:
   /// `false' otherwise.
   static bool callToNoUnwindFunction(const MachineInstr *MI);
 };
-}
 
-#endif
+} // end namespace llvm
 
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index c5795559fb7d..e459c02c9a6e 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
@@ -26,7 +27,6 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 035f1a0063aa..e0cc241dd23f 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -11,13 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Mangler.h"
@@ -26,7 +27,6 @@
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include <cctype>
 #include <cstddef>
 #include <cstdint>
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 5d485f213573..a6a8e84a949f 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "WinException.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/Dwarf.h"
@@ -20,6 +19,10 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Mangler.h"
@@ -30,15 +33,9 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCWin64EH.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
@@ -66,7 +63,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
   bool hasLandingPads = !MF->getLandingPads().empty();
   bool hasEHFunclets = MF->hasEHFunclets();
 
-  const Function *F = MF->getFunction();
+  const Function &F = MF->getFunction();
 
   shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI();
 
@@ -75,14 +72,14 @@ void WinException::beginFunction(const MachineFunction *MF) {
 
   EHPersonality Per = EHPersonality::Unknown;
   const Function *PerFn = nullptr;
-  if (F->hasPersonalityFn()) {
-    PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+  if (F.hasPersonalityFn()) {
+    PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
     Per = classifyEHPersonality(PerFn);
   }
 
-  bool forceEmitPersonality = F->hasPersonalityFn() &&
+  bool forceEmitPersonality = F.hasPersonalityFn() &&
                               !isNoOpWithoutInvoke(Per) &&
-                              F->needsUnwindTableEntry();
+                              F.needsUnwindTableEntry();
 
   shouldEmitPersonality =
       forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
@@ -101,7 +98,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
       // functions may still refer to it.
       const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
       StringRef FLinkageName =
-          GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
+          GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
       emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
     }
     shouldEmitLSDA = hasEHFunclets;
@@ -118,10 +115,10 @@ void WinException::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA)
     return;
 
-  const Function *F = MF->getFunction();
+  const Function &F = MF->getFunction();
   EHPersonality Per = EHPersonality::Unknown;
-  if (F->hasPersonalityFn())
-    Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
+  if (F.hasPersonalityFn())
+    Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
 
   // Get rid of any dead landing pads if we're not using funclets. In funclet
   // schemes, the landing pad is not actually reachable. It only exists so
@@ -173,8 +170,8 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
   // Give catches and cleanups a name based off of their parent function and
   // their funclet entry block's number.
   const MachineFunction *MF = MBB->getParent();
-  const Function *F = MF->getFunction();
-  StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+  const Function &F = MF->getFunction();
+  StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
   MCContext &Ctx = MF->getContext();
   StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
   return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
@@ -186,7 +183,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
                                 MCSymbol *Sym) {
   CurrentFuncletEntry = &MBB;
 
-  const Function *F = Asm->MF->getFunction();
+  const Function &F = Asm->MF->getFunction();
   // If a symbol was not provided for the funclet, invent one.
   if (!Sym) {
     Sym = getMCSymbolForMBB(Asm, &MBB);
@@ -201,7 +198,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
     // We want our funclet's entry point to be aligned such that no nops will be
     // present after the label.
     Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
-                       F);
+                       &F);
 
     // Now that we've emitted the alignment directive, point at our funclet.
     Asm->OutStreamer->EmitLabel(Sym);
@@ -218,8 +215,8 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
     const Function *PerFn = nullptr;
 
     // Determine which personality routine we are using for this funclet.
-    if (F->hasPersonalityFn())
-      PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+    if (F.hasPersonalityFn())
+      PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
     const MCSymbol *PersHandlerSym =
         TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI);
 
@@ -240,10 +237,10 @@ void WinException::endFunclet() {
 
   const MachineFunction *MF = Asm->MF;
   if (shouldEmitMoves || shouldEmitPersonality) {
-    const Function *F = MF->getFunction();
+    const Function &F = MF->getFunction();
     EHPersonality Per = EHPersonality::Unknown;
-    if (F->hasPersonalityFn())
-      Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
+    if (F.hasPersonalityFn())
+      Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
 
     // Emit an UNWIND_INFO struct describing the prologue.
     Asm->OutStreamer->EmitWinEHHandlerData();
@@ -252,7 +249,7 @@ void WinException::endFunclet() {
         !CurrentFuncletEntry->isCleanupFuncletEntry()) {
       // If this is a C++ catch funclet (or the parent function),
       // emit a reference to the LSDA for the parent function.
-      StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+      StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
       MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
           Twine("$cppxdata$", FuncLinkageName));
       Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
@@ -536,7 +533,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
   // Emit a label assignment with the SEH frame offset so we can use it for
   // llvm.x86.seh.recoverfp.
   StringRef FLinkageName =
-      GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
+      GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
   MCSymbol *ParentFrameOffset =
       Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
   const MCExpr *MCOffset =
@@ -631,11 +628,11 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
 }
 
 void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
-  const Function *F = MF->getFunction();
+  const Function &F = MF->getFunction();
   auto &OS = *Asm->OutStreamer;
   const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
 
-  StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+  StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
 
   SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
   MCSymbol *FuncInfoXData = nullptr;
@@ -941,8 +938,8 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
 /// indexed by state number instead of IP.
 void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
   MCStreamer &OS = *Asm->OutStreamer;
-  const Function *F = MF->getFunction();
-  StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
+  const Function &F = MF->getFunction();
+  StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
 
   bool VerboseAsm = OS.isVerboseAsm();
   auto AddComment = [&](const Twine &Comment) {
@@ -959,7 +956,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
   OS.EmitLabel(LSDALabel);
 
   const Function *Per =
-      dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+      dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
   StringRef PerName = Per->getName();
   int BaseState = -1;
   if (PerName == "_except_handler4") {
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index aa9c8e94d08a..7042bc997223 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -1,4 +1,4 @@
-//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
+//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,31 +15,54 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/AtomicExpandUtils.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "atomic-expand"
 
 namespace {
+
   class AtomicExpand: public FunctionPass {
-    const TargetLowering *TLI;
+    const TargetLowering *TLI = nullptr;
+
   public:
     static char ID; // Pass identification, replacement for typeid
-    AtomicExpand() : FunctionPass(ID), TLI(nullptr) {
+
+    AtomicExpand() : FunctionPass(ID) {
       initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
     }
 
@@ -92,39 +115,41 @@ namespace {
     llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
                                    CreateCmpXchgInstFun CreateCmpXchg);
   };
-}
+
+} // end anonymous namespace
 
 char AtomicExpand::ID = 0;
+
 char &llvm::AtomicExpandID = AtomicExpand::ID;
+
 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
                 false, false)
 
 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
 
-namespace {
 // Helper functions to retrieve the size of atomic instructions.
-unsigned getAtomicOpSize(LoadInst *LI) {
+static unsigned getAtomicOpSize(LoadInst *LI) {
   const DataLayout &DL = LI->getModule()->getDataLayout();
   return DL.getTypeStoreSize(LI->getType());
 }
 
-unsigned getAtomicOpSize(StoreInst *SI) {
+static unsigned getAtomicOpSize(StoreInst *SI) {
   const DataLayout &DL = SI->getModule()->getDataLayout();
   return DL.getTypeStoreSize(SI->getValueOperand()->getType());
 }
 
-unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
+static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
   const DataLayout &DL = RMWI->getModule()->getDataLayout();
   return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
 }
 
-unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
+static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
   const DataLayout &DL = CASI->getModule()->getDataLayout();
   return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
 }
 
 // Helper functions to retrieve the alignment of atomic instructions.
-unsigned getAtomicOpAlign(LoadInst *LI) {
+static unsigned getAtomicOpAlign(LoadInst *LI) {
   unsigned Align = LI->getAlignment();
   // In the future, if this IR restriction is relaxed, we should
   // return DataLayout::getABITypeAlignment when there's no align
@@ -133,7 +158,7 @@ unsigned getAtomicOpAlign(LoadInst *LI) {
   return Align;
 }
 
-unsigned getAtomicOpAlign(StoreInst *SI) {
+static unsigned getAtomicOpAlign(StoreInst *SI) {
   unsigned Align = SI->getAlignment();
   // In the future, if this IR restriction is relaxed, we should
   // return DataLayout::getABITypeAlignment when there's no align
@@ -142,7 +167,7 @@ unsigned getAtomicOpAlign(StoreInst *SI) {
   return Align;
 }
 
-unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
+static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
   // TODO(PR27168): This instruction has no alignment attribute, but unlike the
   // default alignment for load/store, the default here is to assume
   // it has NATURAL alignment, not DataLayout-specified alignment.
@@ -150,7 +175,7 @@ unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
   return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
 }
 
-unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
+static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
   // TODO(PR27168): same comment as above.
   const DataLayout &DL = CASI->getModule()->getDataLayout();
   return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
@@ -160,14 +185,12 @@ unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
 // and is of appropriate alignment, to be passed through for target
 // lowering. (Versus turning into a __atomic libcall)
 template <typename Inst>
-bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
+static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
   unsigned Size = getAtomicOpSize(I);
   unsigned Align = getAtomicOpAlign(I);
   return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
 }
 
-} // end anonymous namespace
-
 bool AtomicExpand::runOnFunction(Function &F) {
   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
   if (!TPC)
@@ -320,16 +343,10 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
   auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
 
   auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
-  // The trailing fence is emitted before the instruction instead of after
-  // because there is no easy way of setting Builder insertion point after
-  // an instruction. So we must erase it from the BB, and insert it back
-  // in the right place.
   // We have a guard here because not every atomic operation generates a
   // trailing fence.
-  if (TrailingFence) {
-    TrailingFence->removeFromParent();
-    TrailingFence->insertAfter(I);
-  }
+  if (TrailingFence)
+    TrailingFence->moveAfter(I);
 
   return (LeadingFence || TrailingFence);
 }
@@ -562,6 +579,7 @@ struct PartwordMaskValues {
   Value *Mask;
   Value *Inv_Mask;
 };
+
 } // end anonymous namespace
 
 /// This is a helper function which builds instructions to provide
@@ -580,7 +598,6 @@ struct PartwordMaskValues {
 ///       include only the part that would've been loaded from Addr.
 ///
 /// Inv_Mask: The inverse of Mask.
-
 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
                                            Type *ValueType, Value *Addr,
                                            unsigned WordSize) {
@@ -686,7 +703,6 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
 /// part of the value.
 void AtomicExpand::expandPartwordAtomicRMW(
     AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
-
   assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
 
   AtomicOrdering MemOpOrder = AI->getOrdering();
@@ -943,7 +959,6 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
   return NewCI;
 }
 
-
 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
   AtomicOrdering FailureOrder = CI->getFailureOrdering();
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index be93ff0dad29..d11f375b176e 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,21 +15,20 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/TargetTransformInfoImpl.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
-#include <utility>
+#include "llvm/Target/TargetMachine.h"
+
 using namespace llvm;
 
 // This flag is used by the template base class for BasicTTIImpl, and here to
 // provide a definition.
 cl::opt<unsigned>
-    llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
-                                    cl::desc("Threshold for partial unrolling"),
-                                    cl::Hidden);
+llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+                                cl::desc("Threshold for partial unrolling"),
+                                cl::Hidden);
 
 BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/CodeGen/BranchCoalescing.cpp
deleted file mode 100644
index 2c41b597843c..000000000000
--- a/lib/CodeGen/BranchCoalescing.cpp
+++ /dev/null
@@ -1,758 +0,0 @@
-//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Coalesce basic blocks guarded by the same branch condition into a single
-/// basic block.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "branch-coalescing"
-
-static cl::opt<cl::boolOrDefault>
-    EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
-                           cl::desc("enable coalescing of duplicate branches"));
-
-STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
-STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
-STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
-
-//===----------------------------------------------------------------------===//
-//                               BranchCoalescing
-//===----------------------------------------------------------------------===//
-///
-/// Improve scheduling by coalescing branches that depend on the same condition.
-/// This pass looks for blocks that are guarded by the same branch condition
-/// and attempts to merge the blocks together. Such opportunities arise from
-/// the expansion of select statements in the IR.
-///
-/// For example, consider the following LLVM IR:
-///
-/// %test = icmp eq i32 %x 0
-/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
-/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
-///
-/// This IR expands to the following machine code on PowerPC:
-///
-/// BB#0: derived from LLVM BB %entry
-///    Live Ins: %F1 %F3 %X6
-///        <SNIP1>
-///        %vreg0<def> = COPY %F1; F8RC:%vreg0
-///        %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
-///        %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
-///                    mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
-///        BCC 76, %vreg5, <BB#2>; CRRC:%vreg5
-///    Successors according to CFG: BB#1(?%) BB#2(?%)
-///
-/// BB#1: derived from LLVM BB %entry
-///    Predecessors according to CFG: BB#0
-///    Successors according to CFG: BB#2(?%)
-///
-/// BB#2: derived from LLVM BB %entry
-///    Predecessors according to CFG: BB#0 BB#1
-///        %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
-///                    F8RC:%vreg9,%vreg8,%vreg0
-///        <SNIP2>
-///        BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
-///    Successors according to CFG: BB#3(?%) BB#4(?%)
-///
-/// BB#3: derived from LLVM BB %entry
-///    Predecessors according to CFG: BB#2
-///    Successors according to CFG: BB#4(?%)
-///
-/// BB#4: derived from LLVM BB %entry
-///    Predecessors according to CFG: BB#2 BB#3
-///        %vreg13<def> = PHI %vreg12, <BB#3>, %vreg2, <BB#2>;
-///                     F8RC:%vreg13,%vreg12,%vreg2
-///        <SNIP3>
-///        BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
-///
-/// When this pattern is detected, branch coalescing will try to collapse
-/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3.
-///
-/// If all conditions are meet, IR should collapse to:
-///
-/// BB#0: derived from LLVM BB %entry
-///    Live Ins: %F1 %F3 %X6
-///        <SNIP1>
-///        %vreg0<def> = COPY %F1; F8RC:%vreg0
-///        %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
-///        %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
-///                     mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
-///        <SNIP2>
-///        BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
-///    Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%)
-///      BB#4(0x55555554 / 0x80000000 = 66.67%)
-///
-/// BB#1: derived from LLVM BB %entry
-///    Predecessors according to CFG: BB#0
-///    Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%)
-///
-/// BB#4: derived from LLVM BB %entry
-///    Predecessors according to CFG: BB#0 BB#1
-///        %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
-///                    F8RC:%vreg9,%vreg8,%vreg0
-///        %vreg13<def> = PHI %vreg12, <BB#1>, %vreg2, <BB#0>;
-///                     F8RC:%vreg13,%vreg12,%vreg2
-///        <SNIP3>
-///        BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
-///
-/// Branch Coalescing does not split blocks, it moves everything in the same
-/// direction ensuring it does not break use/definition semantics.
-///
-/// PHI nodes and its corresponding use instructions are moved to its successor
-/// block if there are no uses within the successor block PHI nodes.  PHI
-/// node ordering cannot be assumed.
-///
-/// Non-PHI can be moved up to the predecessor basic block or down to the
-/// successor basic block following any PHI instructions. Whether it moves
-/// up or down depends on whether the register(s) defined in the instructions
-/// are used in current block or in any PHI instructions at the beginning of
-/// the successor block.
-
-namespace {
-
-class BranchCoalescing : public MachineFunctionPass {
-  struct CoalescingCandidateInfo {
-    MachineBasicBlock *BranchBlock;       // Block containing the branch
-    MachineBasicBlock *BranchTargetBlock; // Block branched to
-    MachineBasicBlock *FallThroughBlock;  // Fall-through if branch not taken
-    SmallVector<MachineOperand, 4> Cond;
-    bool MustMoveDown;
-    bool MustMoveUp;
-
-    CoalescingCandidateInfo();
-    void clear();
-  };
-
-  MachineDominatorTree *MDT;
-  MachinePostDominatorTree *MPDT;
-  const TargetInstrInfo *TII;
-  MachineRegisterInfo *MRI;
-
-  void initialize(MachineFunction &F);
-  bool canCoalesceBranch(CoalescingCandidateInfo &Cand);
-  bool identicalOperands(ArrayRef<MachineOperand> OperandList1,
-                         ArrayRef<MachineOperand> OperandList2) const;
-  bool validateCandidates(CoalescingCandidateInfo &SourceRegion,
-                          CoalescingCandidateInfo &TargetRegion) const;
-
-  static bool isBranchCoalescingEnabled() {
-    return EnableBranchCoalescing == cl::BOU_TRUE;
-  }
-
-public:
-  static char ID;
-
-  BranchCoalescing() : MachineFunctionPass(ID) {
-    initializeBranchCoalescingPass(*PassRegistry::getPassRegistry());
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<MachineDominatorTree>();
-    AU.addRequired<MachinePostDominatorTree>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  StringRef getPassName() const override { return "Branch Coalescing"; }
-
-  bool mergeCandidates(CoalescingCandidateInfo &SourceRegion,
-                       CoalescingCandidateInfo &TargetRegion);
-  bool canMoveToBeginning(const MachineInstr &MI,
-                          const MachineBasicBlock &MBB) const;
-  bool canMoveToEnd(const MachineInstr &MI,
-                    const MachineBasicBlock &MBB) const;
-  bool canMerge(CoalescingCandidateInfo &SourceRegion,
-                CoalescingCandidateInfo &TargetRegion) const;
-  void moveAndUpdatePHIs(MachineBasicBlock *SourceRegionMBB,
-                         MachineBasicBlock *TargetRegionMBB);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-} // End anonymous namespace.
-
-char BranchCoalescing::ID = 0;
-char &llvm::BranchCoalescingID = BranchCoalescing::ID;
-
-INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE,
-                      "Branch Coalescing", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
-                    false, false)
-
-BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
-    : BranchBlock(nullptr), BranchTargetBlock(nullptr),
-      FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {}
-
-void BranchCoalescing::CoalescingCandidateInfo::clear() {
-  BranchBlock = nullptr;
-  BranchTargetBlock = nullptr;
-  FallThroughBlock = nullptr;
-  Cond.clear();
-  MustMoveDown = false;
-  MustMoveUp = false;
-}
-
-void BranchCoalescing::initialize(MachineFunction &MF) {
-  MDT = &getAnalysis<MachineDominatorTree>();
-  MPDT = &getAnalysis<MachinePostDominatorTree>();
-  TII = MF.getSubtarget().getInstrInfo();
-  MRI = &MF.getRegInfo();
-}
-
-///
-/// Analyze the branch statement to determine if it can be coalesced. This
-/// method analyses the branch statement for the given candidate to determine
-/// if it can be coalesced. If the branch can be coalesced, then the
-/// BranchTargetBlock and the FallThroughBlock are recorded in the specified
-/// Candidate.
-///
-///\param[in,out] Cand The coalescing candidate to analyze
-///\return true if and only if the branch can be coalesced, false otherwise
-///
-bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
-  DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber()
-               << " can be coalesced:");
-  MachineBasicBlock *FalseMBB = nullptr;
-
-  if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB,
-                         Cand.Cond)) {
-    DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n");
-    return false;
-  }
-
-  for (auto &I : Cand.BranchBlock->terminators()) {
-    DEBUG(dbgs() << "Looking at terminator : " << I << "\n");
-    if (!I.isBranch())
-      continue;
-
-    if (I.getNumOperands() != I.getNumExplicitOperands()) {
-      DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I
-                   << "\n");
-      return false;
-    }
-  }
-
-  if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) {
-    DEBUG(dbgs() << "EH Pad - skip\n");
-    return false;
-  }
-
-  // For now only consider triangles (i.e, BranchTargetBlock is set,
-  // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock)
-  if (!Cand.BranchTargetBlock || FalseMBB ||
-      !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) {
-    DEBUG(dbgs() << "Does not form a triangle - skip\n");
-    return false;
-  }
-
-  // Ensure there are only two successors
-  if (Cand.BranchBlock->succ_size() != 2) {
-    DEBUG(dbgs() << "Does not have 2 successors - skip\n");
-    return false;
-  }
-
-  // Sanity check - the block must be able to fall through
-  assert(Cand.BranchBlock->canFallThrough() &&
-         "Expecting the block to fall through!");
-
-  // We have already ensured there are exactly two successors to
-  // BranchBlock and that BranchTargetBlock is a successor to BranchBlock.
-  // Ensure the single fall though block is empty.
-  MachineBasicBlock *Succ =
-    (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock)
-    ? *Cand.BranchBlock->succ_rbegin()
-    : *Cand.BranchBlock->succ_begin();
-
-  assert(Succ && "Expecting a valid fall-through block\n");
-
-  if (!Succ->empty()) {
-      DEBUG(dbgs() << "Fall-through block contains code -- skip\n");
-      return false;
-  }
-
-  if (!Succ->isSuccessor(Cand.BranchTargetBlock)) {
-      DEBUG(dbgs()
-            << "Successor of fall through block is not branch taken block\n");
-      return false;
-  }
-
-  Cand.FallThroughBlock = Succ;
-  DEBUG(dbgs() << "Valid Candidate\n");
-  return true;
-}
-
-///
-/// Determine if the two operand lists are identical
-///
-/// \param[in] OpList1 operand list
-/// \param[in] OpList2 operand list
-/// \return true if and only if the operands lists are identical
-///
-bool BranchCoalescing::identicalOperands(
-    ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const {
-
-  if (OpList1.size() != OpList2.size()) {
-    DEBUG(dbgs() << "Operand list is different size\n");
-    return false;
-  }
-
-  for (unsigned i = 0; i < OpList1.size(); ++i) {
-    const MachineOperand &Op1 = OpList1[i];
-    const MachineOperand &Op2 = OpList2[i];
-
-    DEBUG(dbgs() << "Op1: " << Op1 << "\n"
-                 << "Op2: " << Op2 << "\n");
-
-    if (Op1.isIdenticalTo(Op2)) {
-      DEBUG(dbgs() << "Op1 and Op2 are identical!\n");
-      continue;
-    }
-
-    // If the operands are not identical, but are registers, check to see if the
-    // definition of the register produces the same value. If they produce the
-    // same value, consider them to be identical.
-    if (Op1.isReg() && Op2.isReg() &&
-        TargetRegisterInfo::isVirtualRegister(Op1.getReg()) &&
-        TargetRegisterInfo::isVirtualRegister(Op2.getReg())) {
-      MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg());
-      MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg());
-      if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) {
-        DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def
-                     << " produce the same value!\n");
-      } else {
-        DEBUG(dbgs() << "Operands produce different values\n");
-        return false;
-      }
-    } else {
-      DEBUG(dbgs() << "The operands are not provably identical.\n");
-      return false;
-    }
-  }
-  return true;
-}
-
-///
-/// Moves ALL PHI instructions in SourceMBB to beginning of TargetMBB
-/// and update them to refer to the new block.  PHI node ordering
-/// cannot be assumed so it does not matter where the PHI instructions
-/// are moved to in TargetMBB.
-///
-/// \param[in] SourceMBB block to move PHI instructions from
-/// \param[in] TargetMBB block to move PHI instructions to
-///
-void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
-                                         MachineBasicBlock *TargetMBB) {
-
-  MachineBasicBlock::iterator MI = SourceMBB->begin();
-  MachineBasicBlock::iterator ME = SourceMBB->getFirstNonPHI();
-
-  if (MI == ME) {
-    DEBUG(dbgs() << "SourceMBB contains no PHI instructions.\n");
-    return;
-  }
-
-  // Update all PHI instructions in SourceMBB and move to top of TargetMBB
-  for (MachineBasicBlock::iterator Iter = MI; Iter != ME; Iter++) {
-    MachineInstr &PHIInst = *Iter;
-    for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) {
-      MachineOperand &MO = PHIInst.getOperand(i);
-      if (MO.getMBB() == SourceMBB)
-        MO.setMBB(TargetMBB);
-    }
-  }
-  TargetMBB->splice(TargetMBB->begin(), SourceMBB, MI, ME);
-}
-
-///
-/// This function checks if MI can be moved to the beginning of the TargetMBB
-/// following PHI instructions. A MI instruction can be moved to beginning of
-/// the TargetMBB if there are no uses of it within the TargetMBB PHI nodes.
-///
-/// \param[in] MI the machine instruction to move.
-/// \param[in] TargetMBB the machine basic block to move to
-/// \return true if it is safe to move MI to beginning of TargetMBB,
-///         false otherwise.
-///
-bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
-                                          const MachineBasicBlock &TargetMBB
-                                          ) const {
-
-  DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of "
-        << TargetMBB.getNumber() << "\n");
-
-  for (auto &Def : MI.defs()) { // Looking at Def
-    for (auto &Use : MRI->use_instructions(Def.getReg())) {
-      if (Use.isPHI() && Use.getParent() == &TargetMBB) {
-        DEBUG(dbgs() << "    *** used in a PHI -- cannot move ***\n");
-       return false;
-      }
-    }
-  }
-
-  DEBUG(dbgs() << "  Safe to move to the beginning.\n");
-  return true;
-}
-
-///
-/// This function checks if MI can be moved to the end of the TargetMBB,
-/// immediately before the first terminator.  A MI instruction can be moved
-/// to then end of the TargetMBB if no PHI node defines what MI uses within
-/// it's own MBB.
-///
-/// \param[in] MI the machine instruction to move.
-/// \param[in] TargetMBB the machine basic block to move to
-/// \return true if it is safe to move MI to end of TargetMBB,
-///         false otherwise.
-///
-bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
-                                    const MachineBasicBlock &TargetMBB
-                                    ) const {
-
-  DEBUG(dbgs() << "Checking if " << MI << " can move to end of "
-        << TargetMBB.getNumber() << "\n");
-
-  for (auto &Use : MI.uses()) {
-    if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) {
-      MachineInstr *DefInst = MRI->getVRegDef(Use.getReg());
-      if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) {
-        DEBUG(dbgs() << "    *** Cannot move this instruction ***\n");
-        return false;
-      } else {
-        DEBUG(dbgs() << "    *** def is in another block -- safe to move!\n");
-      }
-    }
-  }
-
-  DEBUG(dbgs() << "  Safe to move to the end.\n");
-  return true;
-}
-
-///
-/// This method checks to ensure the two coalescing candidates follows the
-/// expected pattern required for coalescing.
-///
-/// \param[in] SourceRegion The candidate to move statements from
-/// \param[in] TargetRegion The candidate to move statements to
-/// \return true if all instructions in SourceRegion.BranchBlock can be merged
-/// into a block in TargetRegion; false otherwise.
-///
-bool BranchCoalescing::validateCandidates(
-    CoalescingCandidateInfo &SourceRegion,
-    CoalescingCandidateInfo &TargetRegion) const {
-
-  if (TargetRegion.BranchTargetBlock != SourceRegion.BranchBlock)
-    llvm_unreachable("Expecting SourceRegion to immediately follow TargetRegion");
-  else if (!MDT->dominates(TargetRegion.BranchBlock, SourceRegion.BranchBlock))
-    llvm_unreachable("Expecting TargetRegion to dominate SourceRegion");
-  else if (!MPDT->dominates(SourceRegion.BranchBlock, TargetRegion.BranchBlock))
-    llvm_unreachable("Expecting SourceRegion to post-dominate TargetRegion");
-  else if (!TargetRegion.FallThroughBlock->empty() ||
-           !SourceRegion.FallThroughBlock->empty())
-    llvm_unreachable("Expecting fall-through blocks to be empty");
-
-  return true;
-}
-
-///
-/// This method determines whether the two coalescing candidates can be merged.
-/// In order to be merged, all instructions must be able to
-///   1. Move to the beginning of the SourceRegion.BranchTargetBlock;
-///   2. Move to the end of the TargetRegion.BranchBlock.
-/// Merging involves moving the instructions in the
-/// TargetRegion.BranchTargetBlock (also SourceRegion.BranchBlock).
-///
-/// This function first try to move instructions from the
-/// TargetRegion.BranchTargetBlock down, to the beginning of the
-/// SourceRegion.BranchTargetBlock. This is not possible if any register defined
-/// in TargetRegion.BranchTargetBlock is used in a PHI node in the
-/// SourceRegion.BranchTargetBlock. In this case, check whether the statement
-/// can be moved up, to the end of the TargetRegion.BranchBlock (immediately
-/// before the branch statement). If it cannot move, then these blocks cannot
-/// be merged.
-///
-/// Note that there is no analysis for moving instructions past the fall-through
-/// blocks because they are confirmed to be empty. An assert is thrown if they
-/// are not.
-///
-/// \param[in] SourceRegion The candidate to move statements from
-/// \param[in] TargetRegion The candidate to move statements to
-/// \return true if all instructions in SourceRegion.BranchBlock can be merged
-///         into a block in TargetRegion, false otherwise.
-///
-bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
-                                CoalescingCandidateInfo &TargetRegion) const {
-  if (!validateCandidates(SourceRegion, TargetRegion))
-    return false;
-
-  // Walk through PHI nodes first and see if they force the merge into the
-  // SourceRegion.BranchTargetBlock.
-  for (MachineBasicBlock::iterator
-           I = SourceRegion.BranchBlock->instr_begin(),
-           E = SourceRegion.BranchBlock->getFirstNonPHI();
-       I != E; ++I) {
-    for (auto &Def : I->defs())
-      for (auto &Use : MRI->use_instructions(Def.getReg())) {
-        if (Use.isPHI() && Use.getParent() == SourceRegion.BranchTargetBlock) {
-          DEBUG(dbgs() << "PHI " << *I << " defines register used in another "
-                          "PHI within branch target block -- can't merge\n");
-          NumPHINotMoved++;
-          return false;
-        }
-        if (Use.getParent() == SourceRegion.BranchBlock) {
-          DEBUG(dbgs() << "PHI " << *I
-                       << " defines register used in this "
-                          "block -- all must move down\n");
-          SourceRegion.MustMoveDown = true;
-        }
-      }
-  }
-
-  // Walk through the MI to see if they should be merged into
-  // TargetRegion.BranchBlock (up) or SourceRegion.BranchTargetBlock (down)
-  for (MachineBasicBlock::iterator
-           I = SourceRegion.BranchBlock->getFirstNonPHI(),
-           E = SourceRegion.BranchBlock->end();
-       I != E; ++I) {
-    if (!canMoveToBeginning(*I, *SourceRegion.BranchTargetBlock)) {
-      DEBUG(dbgs() << "Instruction " << *I
-                   << " cannot move down - must move up!\n");
-      SourceRegion.MustMoveUp = true;
-    }
-    if (!canMoveToEnd(*I, *TargetRegion.BranchBlock)) {
-      DEBUG(dbgs() << "Instruction " << *I
-                   << " cannot move up - must move down!\n");
-      SourceRegion.MustMoveDown = true;
-    }
-  }
-
-  return (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) ? false : true;
-}
-
-/// Merge the instructions from SourceRegion.BranchBlock,
-/// SourceRegion.BranchTargetBlock, and SourceRegion.FallThroughBlock into
-/// TargetRegion.BranchBlock, TargetRegion.BranchTargetBlock and
-/// TargetRegion.FallThroughBlock respectively.
-///
-/// The successors for blocks in TargetRegion will be updated to use the
-/// successors from blocks in SourceRegion. Finally, the blocks in SourceRegion
-/// will be removed from the function.
-///
-/// A region consists of a BranchBlock, a FallThroughBlock, and a
-/// BranchTargetBlock. Branch coalesce works on patterns where the
-/// TargetRegion's BranchTargetBlock must also be the SourceRegions's
-/// BranchBlock.
-///
-///  Before mergeCandidates:
-///
-///  +---------------------------+
-///  |  TargetRegion.BranchBlock |
-///  +---------------------------+
-///     /        |
-///    /   +--------------------------------+
-///   |    |  TargetRegion.FallThroughBlock |
-///    \   +--------------------------------+
-///     \        |
-///  +----------------------------------+
-///  |  TargetRegion.BranchTargetBlock  |
-///  |  SourceRegion.BranchBlock        |
-///  +----------------------------------+
-///     /        |
-///    /   +--------------------------------+
-///   |    |  SourceRegion.FallThroughBlock |
-///    \   +--------------------------------+
-///     \        |
-///  +----------------------------------+
-///  |  SourceRegion.BranchTargetBlock  |
-///  +----------------------------------+
-///
-///  After mergeCandidates:
-///
-///  +-----------------------------+
-///  |  TargetRegion.BranchBlock   |
-///  |  SourceRegion.BranchBlock   |
-///  +-----------------------------+
-///     /        |
-///    /   +---------------------------------+
-///   |    |  TargetRegion.FallThroughBlock  |
-///   |    |  SourceRegion.FallThroughBlock  |
-///    \   +---------------------------------+
-///     \        |
-///  +----------------------------------+
-///  |  SourceRegion.BranchTargetBlock  |
-///  +----------------------------------+
-///
-/// \param[in] SourceRegion The candidate to move blocks from
-/// \param[in] TargetRegion The candidate to move blocks to
-///
-bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
-                                       CoalescingCandidateInfo &TargetRegion) {
-
-  if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) {
-    llvm_unreachable("Cannot have both MustMoveDown and MustMoveUp set!");
-    return false;
-  }
-
-  if (!validateCandidates(SourceRegion, TargetRegion))
-    return false;
-
-  // Start the merging process by first handling the BranchBlock.
-  // Move any PHIs in SourceRegion.BranchBlock down to the branch-taken block
-  moveAndUpdatePHIs(SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
-
-  // Move remaining instructions in SourceRegion.BranchBlock into
-  // TargetRegion.BranchBlock
-  MachineBasicBlock::iterator firstInstr =
-      SourceRegion.BranchBlock->getFirstNonPHI();
-  MachineBasicBlock::iterator lastInstr =
-      SourceRegion.BranchBlock->getFirstTerminator();
-
-  MachineBasicBlock *Source = SourceRegion.MustMoveDown
-                                  ? SourceRegion.BranchTargetBlock
-                                  : TargetRegion.BranchBlock;
-
-  MachineBasicBlock::iterator Target =
-      SourceRegion.MustMoveDown
-          ? SourceRegion.BranchTargetBlock->getFirstNonPHI()
-          : TargetRegion.BranchBlock->getFirstTerminator();
-
-  Source->splice(Target, SourceRegion.BranchBlock, firstInstr, lastInstr);
-
-  // Once PHI and instructions have been moved we need to clean up the
-  // control flow.
-
-  // Remove SourceRegion.FallThroughBlock before transferring successors of
-  // SourceRegion.BranchBlock to TargetRegion.BranchBlock.
-  SourceRegion.BranchBlock->removeSuccessor(SourceRegion.FallThroughBlock);
-  TargetRegion.BranchBlock->transferSuccessorsAndUpdatePHIs(
-      SourceRegion.BranchBlock);
-  // Update branch in TargetRegion.BranchBlock to jump to
-  // SourceRegion.BranchTargetBlock
-  // In this case, TargetRegion.BranchTargetBlock == SourceRegion.BranchBlock.
-  TargetRegion.BranchBlock->ReplaceUsesOfBlockWith(
-      SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
-  // Remove the branch statement(s) in SourceRegion.BranchBlock
-  MachineBasicBlock::iterator I =
-      SourceRegion.BranchBlock->terminators().begin();
-  while (I != SourceRegion.BranchBlock->terminators().end()) {
-    MachineInstr &CurrInst = *I;
-    ++I;
-    if (CurrInst.isBranch())
-      CurrInst.eraseFromParent();
-  }
-
-  // Fall-through block should be empty since this is part of the condition
-  // to coalesce the branches.
-  assert(TargetRegion.FallThroughBlock->empty() &&
-         "FallThroughBlocks should be empty!");
-
-  // Transfer successor information and move PHIs down to the
-  // branch-taken block.
-  TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs(
-      SourceRegion.FallThroughBlock);
-  TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock);
-
-  // Remove the blocks from the function.
-  assert(SourceRegion.BranchBlock->empty() &&
-         "Expecting branch block to be empty!");
-  SourceRegion.BranchBlock->eraseFromParent();
-
-  assert(SourceRegion.FallThroughBlock->empty() &&
-         "Expecting fall-through block to be empty!\n");
-  SourceRegion.FallThroughBlock->eraseFromParent();
-
-  NumBlocksCoalesced++;
-  return true;
-}
-
-bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
-
-  if (skipFunction(*MF.getFunction()) || MF.empty() ||
-      !isBranchCoalescingEnabled())
-    return false;
-
-  bool didSomething = false;
-
-  DEBUG(dbgs() << "******** Branch Coalescing ********\n");
-  initialize(MF);
-
-  DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
-
-  CoalescingCandidateInfo Cand1, Cand2;
-  // Walk over blocks and find candidates to merge
-  // Continue trying to merge with the first candidate found, as long as merging
-  // is successfull.
-  for (MachineBasicBlock &MBB : MF) {
-    bool MergedCandidates = false;
-    do {
-      MergedCandidates = false;
-      Cand1.clear();
-      Cand2.clear();
-
-      Cand1.BranchBlock = &MBB;
-
-      // If unable to coalesce the branch, then continue to next block
-      if (!canCoalesceBranch(Cand1))
-        break;
-
-      Cand2.BranchBlock = Cand1.BranchTargetBlock;
-      if (!canCoalesceBranch(Cand2))
-        break;
-
-      // Sanity check
-      // The branch-taken block of the second candidate should post-dominate the
-      // first candidate
-      assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) &&
-             "Branch-taken block should post-dominate first candidate");
-
-      if (!identicalOperands(Cand1.Cond, Cand2.Cond)) {
-        DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and "
-                     << Cand2.BranchBlock->getNumber()
-                     << " have different branches\n");
-        break;
-      }
-      if (!canMerge(Cand2, Cand1)) {
-        DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber()
-                     << " and " << Cand2.BranchBlock->getNumber() << "\n");
-        NumBlocksNotCoalesced++;
-        continue;
-      }
-      DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber()
-                   << " and " << Cand1.BranchTargetBlock->getNumber() << "\n");
-      MergedCandidates = mergeCandidates(Cand2, Cand1);
-      if (MergedCandidates)
-        didSomething = true;
-
-      DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n");
-    } while (MergedCandidates);
-  }
-
-#ifndef NDEBUG
-  // Verify MF is still valid after branch coalescing
-  if (didSomething)
-    MF.verify(nullptr, "Error in code produced by branch coalescing");
-#endif // NDEBUG
-
-  DEBUG(dbgs() << "Finished Branch Coalescing\n");
-  return didSomething;
-}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 3c439e66944b..7f358a679366 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -19,27 +19,35 @@
 
 #include "BranchFolding.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BlockFrequency.h"
@@ -48,10 +56,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 #include <cstddef>
 #include <iterator>
@@ -81,8 +86,8 @@ TailMergeThreshold("tail-merge-threshold",
 // TODO: This should be replaced with a target query.
 static cl::opt<unsigned>
 TailMergeSize("tail-merge-size",
-          cl::desc("Min number of instructions to consider tail merging"),
-                              cl::init(3), cl::Hidden);
+              cl::desc("Min number of instructions to consider tail merging"),
+              cl::init(3), cl::Hidden);
 
 namespace {
 
@@ -106,13 +111,14 @@ namespace {
 } // end anonymous namespace
 
 char BranchFolderPass::ID = 0;
+
 char &llvm::BranchFolderPassID = BranchFolderPass::ID;
 
 INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE,
                 "Control Flow Optimizer", false, false)
 
 bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
@@ -365,15 +371,37 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   return TailLen;
 }
 
-void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
-                                           MachineBasicBlock *NewDest) {
-  TII->ReplaceTailWithBranchTo(OldInst, NewDest);
-
+void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                           MachineBasicBlock &NewDest) {
   if (UpdateLiveIns) {
-    NewDest->clearLiveIns();
-    computeLiveIns(LiveRegs, *MRI, *NewDest);
+    // OldInst should always point to an instruction.
+    MachineBasicBlock &OldMBB = *OldInst->getParent();
+    LiveRegs.clear();
+    LiveRegs.addLiveOuts(OldMBB);
+    // Move backward to the place where will insert the jump.
+    MachineBasicBlock::iterator I = OldMBB.end();
+    do {
+      --I;
+      LiveRegs.stepBackward(*I);
+    } while (I != OldInst);
+
+    // Merging the tails may have switched some undef operand to non-undef ones.
+    // Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the
+    // register.
+    for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) {
+      // We computed the liveins with computeLiveIn earlier and should only see
+      // full registers:
+      assert(P.LaneMask == LaneBitmask::getAll() &&
+             "Can only handle full register.");
+      MCPhysReg Reg = P.PhysReg;
+      if (!LiveRegs.available(*MRI, Reg))
+        continue;
+      DebugLoc DL;
+      BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
+    }
   }
 
+  TII->ReplaceTailWithBranchTo(OldInst, &NewDest);
   ++NumTailMerge;
 }
 
@@ -408,7 +436,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
   MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
 
   if (UpdateLiveIns)
-    computeLiveIns(LiveRegs, *MRI, *NewMBB);
+    computeAndAddLiveIns(LiveRegs, *NewMBB);
 
   // Add the new block to the funclet.
   const auto &FuncletI = FuncletMembership.find(&CurMBB);
@@ -585,8 +613,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
   CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
   if (CommonTailLen == 0)
     return false;
-  DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
-               << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+  DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1)
+               << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen
                << '\n');
 
   // It's almost always profitable to merge any number of non-terminator
@@ -657,7 +685,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
   // branch instruction, which is likely to be smaller than the 2
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
-  return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() &&
+  return EffectiveTailLen >= 2 && MF->getFunction().optForSize() &&
          (I1 == MBB1->begin() || I2 == MBB2->begin());
 }
 
@@ -742,7 +770,7 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
     SameTails[commonTailIndex].getTailStartPos();
   MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
 
-  DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+  DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size "
                << maxCommonTailLength);
 
   // If the split block unconditionally falls-thru to SuccBB, it will be
@@ -766,43 +794,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
   return true;
 }
 
-void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
-  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
-
-  std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
-  for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
-    if (i != commonTailIndex)
-      NextCommonInsts[i] = SameTails[i].getTailStartPos();
-    else {
-      assert(SameTails[i].getTailStartPos() == MBB->begin() &&
-          "MBB is not a common tail only block");
-    }
-  }
-
-  for (auto &MI : *MBB) {
-    if (MI.isDebugValue())
-      continue;
-    DebugLoc DL = MI.getDebugLoc();
-    for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
-      if (i == commonTailIndex)
-        continue;
-
-      auto &Pos = NextCommonInsts[i];
-      assert(Pos != SameTails[i].getBlock()->end() &&
-          "Reached BB end within common tail");
-      while (Pos->isDebugValue()) {
-        ++Pos;
-        assert(Pos != SameTails[i].getBlock()->end() &&
-            "Reached BB end within common tail");
-      }
-      assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
-      DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
-      NextCommonInsts[i] = ++Pos;
-    }
-    MI.setDebugLoc(DL);
-  }
-}
-
 static void
 mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
                 MachineBasicBlock &MBBCommon) {
@@ -853,6 +844,67 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
   }
 }
 
+void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
+  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+  std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
+  for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
+    if (i != commonTailIndex) {
+      NextCommonInsts[i] = SameTails[i].getTailStartPos();
+      mergeOperations(SameTails[i].getTailStartPos(), *MBB);
+    } else {
+      assert(SameTails[i].getTailStartPos() == MBB->begin() &&
+          "MBB is not a common tail only block");
+    }
+  }
+
+  for (auto &MI : *MBB) {
+    if (MI.isDebugValue())
+      continue;
+    DebugLoc DL = MI.getDebugLoc();
+    for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
+      if (i == commonTailIndex)
+        continue;
+
+      auto &Pos = NextCommonInsts[i];
+      assert(Pos != SameTails[i].getBlock()->end() &&
+          "Reached BB end within common tail");
+      while (Pos->isDebugValue()) {
+        ++Pos;
+        assert(Pos != SameTails[i].getBlock()->end() &&
+            "Reached BB end within common tail");
+      }
+      assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
+      DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
+      NextCommonInsts[i] = ++Pos;
+    }
+    MI.setDebugLoc(DL);
+  }
+
+  if (UpdateLiveIns) {
+    LivePhysRegs NewLiveIns(*TRI);
+    computeLiveIns(NewLiveIns, *MBB);
+
+    // The flag merging may lead to some register uses no longer using the
+    // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.
+    for (MachineBasicBlock *Pred : MBB->predecessors()) {
+      LiveRegs.init(*TRI);
+      LiveRegs.addLiveOuts(*Pred);
+      MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();
+      for (unsigned Reg : NewLiveIns) {
+        if (!LiveRegs.available(*MRI, Reg))
+          continue;
+        DebugLoc DL;
+        BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
+                Reg);
+      }
+    }
+
+    MBB->clearLiveIns();
+    addLiveIns(*MBB, NewLiveIns);
+  }
+}
+
 // See if any of the blocks in MergePotentials (which all have SuccBB as a
 // successor, or all have no successor if it is null) can be tail-merged.
 // If there is a successor, any blocks in MergePotentials that are not
@@ -868,20 +920,17 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
   bool MadeChange = false;
 
   DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
-        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
-          dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
-                 << (i == e-1 ? "" : ", ");
-        dbgs() << "\n";
-        if (SuccBB) {
-          dbgs() << "  with successor BB#" << SuccBB->getNumber() << '\n';
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs()
+        << printMBBReference(*MergePotentials[i].getBlock())
+        << (i == e - 1 ? "" : ", ");
+        dbgs() << "\n"; if (SuccBB) {
+          dbgs() << "  with successor " << printMBBReference(*SuccBB) << '\n';
           if (PredBB)
-            dbgs() << "  which has fall-through from BB#"
-                   << PredBB->getNumber() << "\n";
-        }
-        dbgs() << "Looking for common tails of at least "
-               << MinCommonTailLength << " instruction"
-               << (MinCommonTailLength == 1 ? "" : "s") << '\n';
-       );
+            dbgs() << "  which has fall-through from "
+                   << printMBBReference(*PredBB) << "\n";
+        } dbgs() << "Looking for common tails of at least "
+                 << MinCommonTailLength << " instruction"
+                 << (MinCommonTailLength == 1 ? "" : "s") << '\n';);
 
   // Sort by hash value so that blocks with identical end sequences sort
   // together.
@@ -955,22 +1004,21 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
     // Recompute common tail MBB's edge weights and block frequency.
     setCommonTailEdgeWeights(*MBB);
 
-    // Merge debug locations across identical instructions for common tail.
-    MergeCommonTailDebugLocs(commonTailIndex);
+    // Merge debug locations, MMOs and undef flags across identical instructions
+    // for common tail.
+    mergeCommonTails(commonTailIndex);
 
     // MBB is common tail.  Adjust all other BB's to jump to this one.
     // Traversal must be forwards so erases work.
-    DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+    DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB)
                  << " for ");
     for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
       if (commonTailIndex == i)
         continue;
-      DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
-                   << (i == e-1 ? "" : ", "));
-      // Merge operations (MMOs, undef flags)
-      mergeOperations(SameTails[i].getTailStartPos(), *MBB);
+      DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock())
+                   << (i == e - 1 ? "" : ", "));
       // Hack the end off BB i, making it jump to BB commonTailIndex instead.
-      ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+      replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB);
       // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
       MergePotentials.erase(SameTails[i].getMPIter());
     }
@@ -1463,7 +1511,7 @@ ReoptimizeBlock:
   }
 
   if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
-      MF.getFunction()->optForSize()) {
+      MF.getFunction().optForSize()) {
     // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
     // direction, thereby defeating careful block placement and regressing
     // performance. Therefore, only consider this for optsize functions.
@@ -1819,7 +1867,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
   if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI))
     return MBB->end();
 
-
   // Find out what registers are live. Note this routine is ignoring other live
   // registers which are only used by instructions in successor blocks.
   for (const MachineOperand &MO : PI->operands()) {
@@ -1921,7 +1968,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
           //
           // BB2:
           // r1 = op2, ...
-          //    = op3, r1<kill>
+          //    = op3, killed r1
           IsSafe = false;
           break;
         }
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 92681137e4c6..0f0952550137 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -1,4 +1,4 @@
-//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===//
+//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,20 +10,27 @@
 #ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H
 #define LLVM_LIB_CODEGEN_BRANCHFOLDING_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/Compiler.h"
+#include <cstdint>
 #include <vector>
 
 namespace llvm {
-  class MachineBlockFrequencyInfo;
-  class MachineBranchProbabilityInfo;
-  class MachineFunction;
-  class MachineModuleInfo;
-  class MachineLoopInfo;
-  class TargetInstrInfo;
-  class TargetRegisterInfo;
+
+class BasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineBranchProbabilityInfo;
+class MachineFunction;
+class MachineLoopInfo;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class raw_ostream;
+class TargetInstrInfo;
+class TargetRegisterInfo;
 
   class LLVM_LIBRARY_VISIBILITY BranchFolder {
   public:
@@ -49,6 +56,7 @@ namespace llvm {
     class MergePotentialsElt {
       unsigned Hash;
       MachineBasicBlock *Block;
+
     public:
       MergePotentialsElt(unsigned h, MachineBasicBlock *b)
         : Hash(h), Block(b) {}
@@ -62,7 +70,9 @@ namespace llvm {
 
       bool operator<(const MergePotentialsElt &) const;
     };
-    typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+
+    using MPIterator = std::vector<MergePotentialsElt>::iterator;
+
     std::vector<MergePotentialsElt> MergePotentials;
     SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
     DenseMap<const MachineBasicBlock *, int> FuncletMembership;
@@ -70,6 +80,7 @@ namespace llvm {
     class SameTailElt {
       MPIterator MPIter;
       MachineBasicBlock::iterator TailStartPos;
+
     public:
       SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
         : MPIter(mp), TailStartPos(tsp) {}
@@ -77,18 +88,23 @@ namespace llvm {
       MPIterator getMPIter() const {
         return MPIter;
       }
+
       MergePotentialsElt &getMergePotentialsElt() const {
         return *getMPIter();
       }
+
       MachineBasicBlock::iterator getTailStartPos() const {
         return TailStartPos;
       }
+
       unsigned getHash() const {
         return getMergePotentialsElt().getHash();
       }
+
       MachineBasicBlock *getBlock() const {
         return getMergePotentialsElt().getBlock();
       }
+
       bool tailIsWholeBlock() const {
         return TailStartPos == getBlock()->begin();
       }
@@ -96,6 +112,7 @@ namespace llvm {
       void setBlock(MachineBasicBlock *MBB) {
         getMergePotentialsElt().setBlock(MBB);
       }
+
       void setTailStartPos(MachineBasicBlock::iterator Pos) {
         TailStartPos = Pos;
       }
@@ -120,6 +137,7 @@ namespace llvm {
     class MBFIWrapper {
     public:
       MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
+
       BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
       void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
       raw_ostream &printBlockFreq(raw_ostream &OS,
@@ -146,8 +164,8 @@ namespace llvm {
 
     /// Delete the instruction OldInst and everything after it, replacing it
     /// with an unconditional branch to NewDest.
-    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
-                                 MachineBasicBlock *NewDest);
+    void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                 MachineBasicBlock &NewDest);
 
     /// Given a machine basic block and an iterator into it, split the MBB so
     /// that the part before the iterator falls into the part starting at the
@@ -182,8 +200,8 @@ namespace llvm {
                                    unsigned &commonTailIndex);
 
     /// Create merged DebugLocs of identical instructions across SameTails and
-    /// assign it to the instruction in common tail.
-    void MergeCommonTailDebugLocs(unsigned commonTailIndex);
+    /// assign it to the instruction in common tail; merge MMOs and undef flags.
+    void mergeCommonTails(unsigned commonTailIndex);
 
     bool OptimizeBranches(MachineFunction &MF);
 
@@ -203,6 +221,7 @@ namespace llvm {
     /// the function, move the instructions before MBB terminator if it's legal.
     bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
   };
-}
 
-#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_BRANCHFOLDING_H
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index 27ee12c4c5ff..0d87f142c7cc 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -1,4 +1,4 @@
-//===-- BranchRelaxation.cpp ----------------------------------------------===//
+//===- BranchRelaxation.cpp -----------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,14 +10,25 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
 
 using namespace llvm;
 
@@ -30,6 +41,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
 #define BRANCH_RELAX_NAME "Branch relaxation pass"
 
 namespace {
+
 class BranchRelaxation : public MachineFunctionPass {
   /// BasicBlockInfo - Information about the offset and size of a single
   /// basic block.
@@ -38,16 +50,16 @@ class BranchRelaxation : public MachineFunctionPass {
     /// of this basic block.
     ///
     /// The offset is always aligned as required by the basic block.
-    unsigned Offset;
+    unsigned Offset = 0;
 
     /// Size - Size of the basic block in bytes.  If the block contains
     /// inline assembly, this is a worst case estimate.
     ///
     /// The size does not include any alignment padding whether from the
     /// beginning of the block, or from an aligned jump table at the end.
-    unsigned Size;
+    unsigned Size = 0;
 
-    BasicBlockInfo() : Offset(0), Size(0) {}
+    BasicBlockInfo() = default;
 
     /// Compute the offset immediately following this block. \p MBB is the next
     /// block.
@@ -95,18 +107,18 @@ class BranchRelaxation : public MachineFunctionPass {
 
 public:
   static char ID;
-  BranchRelaxation() : MachineFunctionPass(ID) { }
+
+  BranchRelaxation() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  StringRef getPassName() const override {
-    return BRANCH_RELAX_NAME;
-  }
+  StringRef getPassName() const override { return BRANCH_RELAX_NAME; }
 };
 
-}
+} // end anonymous namespace
 
 char BranchRelaxation::ID = 0;
+
 char &llvm::BranchRelaxationPassID = BranchRelaxation::ID;
 
 INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false)
@@ -131,7 +143,7 @@ void BranchRelaxation::verify() {
 LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
   for (auto &MBB : *MF) {
     const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
-    dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+    dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
            << format("size=%#x\n", BBI.Size);
   }
 }
@@ -196,7 +208,7 @@ void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
   }
 }
 
-  /// Insert a new empty basic block and insert it after \BB
+/// Insert a new empty basic block and insert it after \BB
 MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) {
   // Create a new MBB for the code after the OrigBB.
   MachineBasicBlock *NewBB =
@@ -233,7 +245,6 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
   // Insert an entry into BlockInfo to align it properly with the block numbers.
   BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
 
-
   NewBB->transferSuccessors(OrigBB);
   OrigBB->addSuccessor(NewBB);
   OrigBB->addSuccessor(DestBB);
@@ -259,7 +270,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
 
   // Need to fix live-in lists if we track liveness.
   if (TRI->trackLivenessAfterRegAlloc(*MF))
-    computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB);
+    computeAndAddLiveIns(LiveRegs, *NewBB);
 
   ++NumSplit;
 
@@ -276,13 +287,10 @@ bool BranchRelaxation::isBlockInRange(
   if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
     return true;
 
-  DEBUG(
-    dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber()
-           << " from BB#" << MI.getParent()->getNumber()
-           << " to " << DestOffset
-           << " offset " << DestOffset - BrOffset
-           << '\t' << MI
-  );
+  DEBUG(dbgs() << "Out of range branch to destination "
+               << printMBBReference(DestBB) << " from "
+               << printMBBReference(*MI.getParent()) << " to " << DestOffset
+               << " offset " << DestOffset - BrOffset << '\t' << MI);
 
   return false;
 }
@@ -348,16 +356,16 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
 
     // Need to fix live-in lists if we track liveness.
     if (TRI->trackLivenessAfterRegAlloc(*MF))
-      computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB);
+      computeAndAddLiveIns(LiveRegs, NewBB);
   }
 
   // We now have an appropriate fall-through block in place (either naturally or
   // just created), so we can invert the condition.
   MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
 
-  DEBUG(dbgs() << "  Insert B to BB#" << TBB->getNumber()
-               << ", invert condition and change dest. to BB#"
-               << NextBB.getNumber() << '\n');
+  DEBUG(dbgs() << "  Insert B to " << printMBBReference(*TBB)
+               << ", invert condition and change dest. to "
+               << printMBBReference(NextBB) << '\n');
 
   unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 7f3c6da91268..07ba5d36cc96 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -4,7 +4,6 @@ add_llvm_library(LLVMCodeGen
   Analysis.cpp
   AtomicExpandPass.cpp
   BasicTargetTransformInfo.cpp
-  BranchCoalescing.cpp
   BranchFolding.cpp
   BranchRelaxation.cpp
   BuiltinGCs.cpp
@@ -12,7 +11,6 @@ add_llvm_library(LLVMCodeGen
   CallingConvLower.cpp
   CodeGen.cpp
   CodeGenPrepare.cpp
-  CountingFunctionInserter.cpp
   CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
   DetectDeadLanes.cpp
@@ -22,6 +20,7 @@ add_llvm_library(LLVMCodeGen
   EdgeBundles.cpp
   ExecutionDepsFix.cpp
   ExpandISelPseudos.cpp
+  ExpandMemCmp.cpp
   ExpandPostRAPseudos.cpp
   ExpandReductions.cpp
   FaultMaps.cpp
@@ -43,7 +42,7 @@ add_llvm_library(LLVMCodeGen
   LexicalScopes.cpp
   LiveDebugValues.cpp
   LiveDebugVariables.cpp
-  LiveIntervalAnalysis.cpp
+  LiveIntervals.cpp
   LiveInterval.cpp
   LiveIntervalUnion.cpp
   LivePhysRegs.cpp
@@ -77,6 +76,7 @@ add_llvm_library(LLVMCodeGen
   MachineLoopInfo.cpp
   MachineModuleInfo.cpp
   MachineModuleInfoImpls.cpp
+  MachineOperand.cpp
   MachineOptimizationRemarkEmitter.cpp
   MachineOutliner.cpp
   MachinePassRegistry.cpp
@@ -114,6 +114,7 @@ add_llvm_library(LLVMCodeGen
   RegisterPressure.cpp
   RegisterScavenging.cpp
   RenameIndependentSubregs.cpp
+  MIRCanonicalizerPass.cpp
   RegisterUsageInfo.cpp
   RegUsageInfoCollector.cpp
   RegUsageInfoPropagate.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index c2ced19458ed..b8920a601938 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -1,4 +1,4 @@
-//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//===- CalcSpillWeights.cpp -----------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,17 +8,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <tuple>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "calcspillweights"
@@ -64,13 +70,24 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
     return sub == hsub ? hreg : 0;
 
   const TargetRegisterClass *rc = mri.getRegClass(reg);
+  if (!tri.enableMultipleCopyHints()) {
+    // Only allow physreg hints in rc.
+    if (sub == 0)
+      return rc->contains(hreg) ? hreg : 0;
+
+    // reg:sub should match the physreg hreg.
+    return tri.getMatchingSuperReg(hreg, sub, rc);
+  }
+
+  unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
+  if (rc->contains(CopiedPReg))
+    return CopiedPReg;
 
-  // Only allow physreg hints in rc.
-  if (sub == 0)
-    return rc->contains(hreg) ? hreg : 0;
+  // Check if reg:sub matches so that a super register could be hinted.
+  if (sub)
+    return tri.getMatchingSuperReg(CopiedPReg, sub, rc);
 
-  // reg:sub should match the physreg hreg.
-  return tri.getMatchingSuperReg(hreg, sub, rc);
+  return 0;
 }
 
 // Check if all values in LI are rematerializable
@@ -127,8 +144,21 @@ static bool isRematerializable(const LiveInterval &LI,
   return true;
 }
 
-void
-VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
+void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
+  float weight = weightCalcHelper(li);
+  // Check if unspillable.
+  if (weight < 0)
+    return;
+  li.weight = weight;
+}
+
+float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start,
+                                   SlotIndex end) {
+  return weightCalcHelper(li, &start, &end);
+}
+
+float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
+                                       SlotIndex *end) {
   MachineRegisterInfo &mri = MF.getRegInfo();
   const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
   MachineBasicBlock *mbb = nullptr;
@@ -138,20 +168,73 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
   unsigned numInstr = 0; // Number of instructions using li
   SmallPtrSet<MachineInstr*, 8> visited;
 
-  // Find the best physreg hint and the best virtreg hint.
-  float bestPhys = 0, bestVirt = 0;
-  unsigned hintPhys = 0, hintVirt = 0;
-
-  // Don't recompute a target specific hint.
-  bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+  std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg);
 
   // Don't recompute spill weight for an unspillable register.
   bool Spillable = li.isSpillable();
 
+  bool localSplitArtifact = start && end;
+
+  // Do not update future local split artifacts.
+  bool updateLI = !localSplitArtifact;
+
+  if (localSplitArtifact) {
+    MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end);
+    assert(localMBB == LIS.getMBBFromIndex(*start) &&
+           "start and end are expected to be in the same basic block");
+
+    // Local split artifact will have 2 additional copy instructions and they
+    // will be in the same BB.
+    // localLI = COPY other
+    // ...
+    // other   = COPY localLI
+    totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
+    totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
+
+    numInstr += 2;
+  }
+
+  // CopyHint is a sortable hint derived from a COPY instruction.
+  struct CopyHint {
+    unsigned Reg;
+    float Weight;
+    bool IsPhys;
+    unsigned HintOrder;
+    CopyHint(unsigned R, float W, bool P, unsigned HR) :
+      Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {}
+    bool operator<(const CopyHint &rhs) const {
+      // Always prefer any physreg hint.
+      if (IsPhys != rhs.IsPhys)
+        return (IsPhys && !rhs.IsPhys);
+      if (Weight != rhs.Weight)
+        return (Weight > rhs.Weight);
+
+      // This is just a temporary way to achive NFC for targets that don't
+      // enable multiple copy hints. HintOrder should be removed when all
+      // targets return true in enableMultipleCopyHints().
+      return (HintOrder < rhs.HintOrder);
+
+#if 0 // Should replace the HintOrder check, see above.
+      // (just for the purpose of maintaining the set)
+      return Reg < rhs.Reg;
+#endif
+    }
+  };
+  std::set<CopyHint> CopyHints;
+
+  // Temporary: see comment for HintOrder above.
+  unsigned CopyHintOrder = 0;
   for (MachineRegisterInfo::reg_instr_iterator
        I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
        I != E; ) {
     MachineInstr *mi = &*(I++);
+
+    // For local split artifacts, we are interested only in instructions between
+    // the expected start and end of the range.
+    SlotIndex si = LIS.getInstructionIndex(*mi);
+    if (localSplitArtifact && ((si < *start) || (si > *end)))
+      continue;
+
     numInstr++;
     if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
       continue;
@@ -180,7 +263,8 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
     }
 
     // Get allocation hints from copies.
-    if (noHint || !mi->isCopy())
+    if (!mi->isCopy() ||
+        (TargetHint.first != 0 && !tri.enableMultipleCopyHints()))
       continue;
     unsigned hint = copyHint(mi, li.reg, tri, mri);
     if (!hint)
@@ -190,39 +274,43 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
     //
     // FIXME: we probably shouldn't use floats at all.
     volatile float hweight = Hint[hint] += weight;
-    if (TargetRegisterInfo::isPhysicalRegister(hint)) {
-      if (hweight > bestPhys && mri.isAllocatable(hint)) {
-        bestPhys = hweight;
-        hintPhys = hint;
-      }
-    } else {
-      if (hweight > bestVirt) {
-        bestVirt = hweight;
-        hintVirt = hint;
-      }
-    }
+    if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint))
+      CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint),
+                     (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++)));
   }
 
   Hint.clear();
 
-  // Always prefer the physreg hint.
-  if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
-    mri.setRegAllocationHint(li.reg, 0, hint);
+  // Pass all the sorted copy hints to mri.
+  if (updateLI && CopyHints.size()) {
+    // Remove a generic hint if previously added by target.
+    if (TargetHint.first == 0 && TargetHint.second)
+      mri.clearSimpleHint(li.reg);
+
+    for (auto &Hint : CopyHints) {
+      if (TargetHint.first != 0 && Hint.Reg == TargetHint.second)
+        // Don't add again the target-type hint.
+        continue;
+      mri.addRegAllocationHint(li.reg, Hint.Reg);
+      if (!tri.enableMultipleCopyHints())
+        break;
+    }
+
     // Weakly boost the spill weight of hinted registers.
     totalWeight *= 1.01F;
   }
 
   // If the live interval was already unspillable, leave it that way.
   if (!Spillable)
-    return;
+    return -1.0;
 
   // Mark li as unspillable if all live ranges are tiny and the interval
   // is not live at any reg mask.  If the interval is live at a reg mask
   // spilling may be required.
-  if (li.isZeroLength(LIS.getSlotIndexes()) &&
+  if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) &&
       !li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
     li.markNotSpillable();
-    return;
+    return -1.0;
   }
 
   // If all of the definitions of the interval are re-materializable,
@@ -232,5 +320,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
   if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
     totalWeight *= 0.5F;
 
-  li.weight = normalize(totalWeight, li.getSize(), numInstr);
+  if (localSplitArtifact)
+    return normalize(totalWeight, start->distance(*end), numInstr);
+  return normalize(totalWeight, li.getSize(), numInstr);
 }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 7cad4d031169..3593089b206d 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -15,14 +15,14 @@
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 
 using namespace llvm;
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index b7fd45a3f6a6..c0d7eb4cf47b 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -21,16 +21,15 @@ using namespace llvm;
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandPass(Registry);
-  initializeBranchCoalescingPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeBranchRelaxationPass(Registry);
   initializeCodeGenPreparePass(Registry);
-  initializeCountingFunctionInserterPass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
   initializeDetectDeadLanesPass(Registry);
   initializeDwarfEHPreparePass(Registry);
   initializeEarlyIfConverterPass(Registry);
   initializeExpandISelPseudosPass(Registry);
+  initializeExpandMemCmpPassPass(Registry);
   initializeExpandPostRAPass(Registry);
   initializeFEntryInserterPass(Registry);
   initializeFinalizeMachineBundlesPass(Registry);
@@ -78,7 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializeRABasicPass(Registry);
-  initializeRAFastPass(Registry);
+  initializeRegAllocFastPass(Registry);
   initializeRAGreedyPass(Registry);
   initializeRegisterCoalescerPass(Registry);
   initializeRenameIndependentSubregsPass(Registry);
@@ -100,6 +99,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeVirtRegRewriterPass(Registry);
   initializeWinEHPreparePass(Registry);
   initializeXRayInstrumentationPass(Registry);
+  initializeMIRCanonicalizerPass(Registry);
 }
 
 void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index dc02a00e0fcc..c4794380f791 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -13,13 +13,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -28,38 +32,69 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -75,6 +110,12 @@ STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
                        "of sunken Casts");
 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
                           "computations were sunk");
+STATISTIC(NumMemoryInstsPhiCreated,
+          "Number of phis created when address "
+          "computations were sunk to memory instructions");
+STATISTIC(NumMemoryInstsSelectCreated,
+          "Number of select created when address "
+          "computations were sunk to memory instructions");
 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
 STATISTIC(NumAndsAdded,
@@ -85,12 +126,6 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
 
-STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
-STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
-STATISTIC(NumMemCmpGreaterThanMax,
-          "Number of memcmp calls with size greater than max size");
-STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
-
 static cl::opt<bool> DisableBranchOpts(
   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
   cl::desc("Disable branch optimizations in CodeGenPrepare"));
@@ -151,25 +186,51 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
     cl::desc("Enable merging of redundant sexts when one is dominating"
     " the other."), cl::init(true));
 
-static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
-    "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
-    cl::desc("The number of loads per basic block for inline expansion of "
-             "memcmp that is only being compared against zero."));
+static cl::opt<bool> DisableComplexAddrModes(
+    "disable-complex-addr-modes", cl::Hidden, cl::init(false),
+    cl::desc("Disables combining addressing modes with different parts "
+             "in optimizeMemoryInst."));
+
+static cl::opt<bool>
+AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
+                cl::desc("Allow creation of Phis in Address sinking."));
+
+static cl::opt<bool>
+AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false),
+                   cl::desc("Allow creation of selects in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseReg(
+    "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
+    cl::desc("Allow combining of BaseReg field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseGV(
+    "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
+    cl::desc("Allow combining of BaseGV field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineBaseOffs(
+    "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
+    cl::desc("Allow combining of BaseOffs field in Address sinking."));
+
+static cl::opt<bool> AddrSinkCombineScaledReg(
+    "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
+    cl::desc("Allow combining of ScaledReg field in Address sinking."));
 
 namespace {
-typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
-typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
-typedef SmallVector<Instruction *, 16> SExts;
-typedef DenseMap<Value *, SExts> ValueToSExts;
+
+using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
+using TypeIsSExt = PointerIntPair<Type *, 1, bool>;
+using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
+using SExts = SmallVector<Instruction *, 16>;
+using ValueToSExts = DenseMap<Value *, SExts>;
+
 class TypePromotionTransaction;
 
   class CodeGenPrepare : public FunctionPass {
-    const TargetMachine *TM;
+    const TargetMachine *TM = nullptr;
     const TargetSubtargetInfo *SubtargetInfo;
-    const TargetLowering *TLI;
+    const TargetLowering *TLI = nullptr;
     const TargetRegisterInfo *TRI;
-    const TargetTransformInfo *TTI;
+    const TargetTransformInfo *TTI = nullptr;
     const TargetLibraryInfo *TLInfo;
     const LoopInfo *LI;
     std::unique_ptr<BlockFrequencyInfo> BFI;
@@ -181,11 +242,14 @@ class TypePromotionTransaction;
 
     /// Keeps track of non-local addresses that have been sunk into a block.
     /// This allows us to avoid inserting duplicate code for blocks with
-    /// multiple load/stores of the same address.
-    ValueMap<Value*, Value*> SunkAddrs;
+    /// multiple load/stores of the same address. The usage of WeakTrackingVH
+    /// enables SunkAddrs to be treated as a cache whose entries can be
+    /// invalidated if a sunken address computation has been erased.
+    ValueMap<Value*, WeakTrackingVH> SunkAddrs;
 
     /// Keeps track of all instructions inserted for the current function.
     SetOfInstrs InsertedInsts;
+
     /// Keeps track of the type of the related instruction before their
     /// promotion for the current function.
     InstrToOrigTy PromotedInsts;
@@ -206,15 +270,15 @@ class TypePromotionTransaction;
     bool OptSize;
 
     /// DataLayout for the Function being processed.
-    const DataLayout *DL;
+    const DataLayout *DL = nullptr;
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    CodeGenPrepare()
-        : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr),
-          DL(nullptr) {
+
+    CodeGenPrepare() : FunctionPass(ID) {
       initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
     }
+
     bool runOnFunction(Function &F) override;
 
     StringRef getPassName() const override { return "CodeGen Prepare"; }
@@ -264,11 +328,12 @@ class TypePromotionTransaction;
         SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
     bool splitBranchCondition(Function &F);
     bool simplifyOffsetableRelocate(Instruction &I);
-    bool splitIndirectCriticalEdges(Function &F);
   };
-}
+
+} // end anonymous namespace
 
 char CodeGenPrepare::ID = 0;
+
 INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
                       "Optimize for code generation", false, false)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
@@ -302,9 +367,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   OptSize = F.optForSize();
 
+  ProfileSummaryInfo *PSI =
+      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   if (ProfileGuidedSectionPrefix) {
-    ProfileSummaryInfo *PSI =
-        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
     if (PSI->isFunctionHotInCallGraph(&F))
       F.setSectionPrefix(".hot");
     else if (PSI->isFunctionColdInCallGraph(&F))
@@ -313,7 +378,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
-  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+  if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
+      TLI->isSlowDivBypassed()) {
     const DenseMap<unsigned int, unsigned int> &BypassWidths =
        TLI->getBypassSlowDivWidths();
     BasicBlock* BB = &*F.begin();
@@ -340,7 +406,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   // Split some critical edges where one of the sources is an indirect branch,
   // to help generate sane code for PHIs involving such edges.
-  EverMadeChange |= splitIndirectCriticalEdges(F);
+  EverMadeChange |= SplitIndirectBrCriticalEdges(F);
 
   bool MadeChange = true;
   while (MadeChange) {
@@ -485,160 +551,6 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
   return DestBB;
 }
 
-// Return the unique indirectbr predecessor of a block. This may return null
-// even if such a predecessor exists, if it's not useful for splitting.
-// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
-// predecessors of BB.
-static BasicBlock *
-findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
-  // If the block doesn't have any PHIs, we don't care about it, since there's
-  // no point in splitting it.
-  PHINode *PN = dyn_cast<PHINode>(BB->begin());
-  if (!PN)
-    return nullptr;
-
-  // Verify we have exactly one IBR predecessor.
-  // Conservatively bail out if one of the other predecessors is not a "regular"
-  // terminator (that is, not a switch or a br).
-  BasicBlock *IBB = nullptr;
-  for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
-    BasicBlock *PredBB = PN->getIncomingBlock(Pred);
-    TerminatorInst *PredTerm = PredBB->getTerminator();
-    switch (PredTerm->getOpcode()) {
-    case Instruction::IndirectBr:
-      if (IBB)
-        return nullptr;
-      IBB = PredBB;
-      break;
-    case Instruction::Br:
-    case Instruction::Switch:
-      OtherPreds.push_back(PredBB);
-      continue;
-    default:
-      return nullptr;
-    }
-  }
-
-  return IBB;
-}
-
-// Split critical edges where the source of the edge is an indirectbr
-// instruction. This isn't always possible, but we can handle some easy cases.
-// This is useful because MI is unable to split such critical edges,
-// which means it will not be able to sink instructions along those edges.
-// This is especially painful for indirect branches with many successors, where
-// we end up having to prepare all outgoing values in the origin block.
-//
-// Our normal algorithm for splitting critical edges requires us to update
-// the outgoing edges of the edge origin block, but for an indirectbr this
-// is hard, since it would require finding and updating the block addresses
-// the indirect branch uses. But if a block only has a single indirectbr
-// predecessor, with the others being regular branches, we can do it in a
-// different way.
-// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
-// We can split D into D0 and D1, where D0 contains only the PHIs from D,
-// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
-// create the following structure:
-// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
-bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
-  // Check whether the function has any indirectbrs, and collect which blocks
-  // they may jump to. Since most functions don't have indirect branches,
-  // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
-  SmallSetVector<BasicBlock *, 16> Targets;
-  for (auto &BB : F) {
-    auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
-    if (!IBI)
-      continue;
-
-    for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
-      Targets.insert(IBI->getSuccessor(Succ));
-  }
-
-  if (Targets.empty())
-    return false;
-
-  bool Changed = false;
-  for (BasicBlock *Target : Targets) {
-    SmallVector<BasicBlock *, 16> OtherPreds;
-    BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
-    // If we did not found an indirectbr, or the indirectbr is the only
-    // incoming edge, this isn't the kind of edge we're looking for.
-    if (!IBRPred || OtherPreds.empty())
-      continue;
-
-    // Don't even think about ehpads/landingpads.
-    Instruction *FirstNonPHI = Target->getFirstNonPHI();
-    if (FirstNonPHI->isEHPad() || Target->isLandingPad())
-      continue;
-
-    BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
-    // It's possible Target was its own successor through an indirectbr.
-    // In this case, the indirectbr now comes from BodyBlock.
-    if (IBRPred == Target)
-      IBRPred = BodyBlock;
-
-    // At this point Target only has PHIs, and BodyBlock has the rest of the
-    // block's body. Create a copy of Target that will be used by the "direct"
-    // preds.
-    ValueToValueMapTy VMap;
-    BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
-
-    for (BasicBlock *Pred : OtherPreds) {
-      // If the target is a loop to itself, then the terminator of the split
-      // block needs to be updated.
-      if (Pred == Target)
-        BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
-      else
-        Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
-    }
-
-    // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
-    // they are clones, so the number of PHIs are the same.
-    // (a) Remove the edge coming from IBRPred from the "Direct" PHI
-    // (b) Leave that as the only edge in the "Indirect" PHI.
-    // (c) Merge the two in the body block.
-    BasicBlock::iterator Indirect = Target->begin(),
-                         End = Target->getFirstNonPHI()->getIterator();
-    BasicBlock::iterator Direct = DirectSucc->begin();
-    BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
-
-    assert(&*End == Target->getTerminator() &&
-           "Block was expected to only contain PHIs");
-
-    while (Indirect != End) {
-      PHINode *DirPHI = cast<PHINode>(Direct);
-      PHINode *IndPHI = cast<PHINode>(Indirect);
-
-      // Now, clean up - the direct block shouldn't get the indirect value,
-      // and vice versa.
-      DirPHI->removeIncomingValue(IBRPred);
-      Direct++;
-
-      // Advance the pointer here, to avoid invalidation issues when the old
-      // PHI is erased.
-      Indirect++;
-
-      PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
-      NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
-                             IBRPred);
-
-      // Create a PHI in the body block, to merge the direct and indirect
-      // predecessors.
-      PHINode *MergePHI =
-          PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
-      MergePHI->addIncoming(NewIndPHI, Target);
-      MergePHI->addIncoming(DirPHI, DirectSucc);
-
-      IndPHI->replaceAllUsesWith(MergePHI);
-      IndPHI->eraseFromParent();
-    }
-
-    Changed = true;
-  }
-
-  return Changed;
-}
-
 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
 /// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -827,7 +739,6 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
   return true;
 }
 
-
 /// Eliminate a basic block that has only phi's and an unconditional branch in
 /// it.
 void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
@@ -948,6 +859,21 @@ static bool
 simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
                           const SmallVectorImpl<GCRelocateInst *> &Targets) {
   bool MadeChange = false;
+  // We must ensure the relocation of derived pointer is defined after
+  // relocation of base pointer. If we find a relocation corresponding to base
+  // defined earlier than relocation of base then we move relocation of base
+  // right before found relocation. We consider only relocation in the same
+  // basic block as relocation of base. Relocations from other basic block will
+  // be skipped by optimization and we do not care about them.
+  for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
+       &*R != RelocatedBase; ++R)
+    if (auto RI = dyn_cast<GCRelocateInst>(R))
+      if (RI->getStatepoint() == RelocatedBase->getStatepoint())
+        if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
+          RelocatedBase->moveBefore(RI);
+          break;
+        }
+
   for (GCRelocateInst *ToReplace : Targets) {
     assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
            "Not relocating a derived object of the original base object");
@@ -1125,6 +1051,7 @@ static bool SinkCast(CastInst *CI) {
 
   // If we removed all uses, nuke the cast.
   if (CI->use_empty()) {
+    salvageDebugInfo(*CI);
     CI->eraseFromParent();
     MadeChange = true;
   }
@@ -1137,7 +1064,6 @@ static bool SinkCast(CastInst *CI) {
 /// reduce the number of virtual registers that must be created and coalesced.
 ///
 /// Return true if any changes are made.
-///
 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
                                        const DataLayout &DL) {
   // Sink only "cheap" (or nop) address-space casts.  This is a weaker condition
@@ -1641,656 +1567,6 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
   return true;
 }
 
-// This class provides helper functions to expand a memcmp library call into an
-// inline expansion.
-class MemCmpExpansion {
-  struct ResultBlock {
-    BasicBlock *BB;
-    PHINode *PhiSrc1;
-    PHINode *PhiSrc2;
-    ResultBlock();
-  };
-
-  CallInst *CI;
-  ResultBlock ResBlock;
-  unsigned MaxLoadSize;
-  unsigned NumBlocks;
-  unsigned NumBlocksNonOneByte;
-  unsigned NumLoadsPerBlock;
-  std::vector<BasicBlock *> LoadCmpBlocks;
-  BasicBlock *EndBlock;
-  PHINode *PhiRes;
-  bool IsUsedForZeroCmp;
-  const DataLayout &DL;
-  IRBuilder<> Builder;
-
-  unsigned calculateNumBlocks(unsigned Size);
-  void createLoadCmpBlocks();
-  void createResultBlock();
-  void setupResultBlockPHINodes();
-  void setupEndBlockPHINodes();
-  void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
-                            unsigned GEPIndex);
-  Value *getCompareLoadPairs(unsigned Index, unsigned Size,
-                             unsigned &NumBytesProcessed);
-  void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
-                                         unsigned &NumBytesProcessed);
-  void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
-  void emitMemCmpResultBlock();
-  Value *getMemCmpExpansionZeroCase(unsigned Size);
-  Value *getMemCmpEqZeroOneBlock(unsigned Size);
-  Value *getMemCmpOneBlock(unsigned Size);
-  unsigned getLoadSize(unsigned Size);
-  unsigned getNumLoads(unsigned Size);
-
-public:
-  MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
-                  unsigned NumLoadsPerBlock, const DataLayout &DL);
-  Value *getMemCmpExpansion(uint64_t Size);
-};
-
-MemCmpExpansion::ResultBlock::ResultBlock()
-    : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {}
-
-// Initialize the basic block structure required for expansion of memcmp call
-// with given maximum load size and memcmp size parameter.
-// This structure includes:
-// 1. A list of load compare blocks - LoadCmpBlocks.
-// 2. An EndBlock, split from original instruction point, which is the block to
-// return from.
-// 3. ResultBlock, block to branch to for early exit when a
-// LoadCmpBlock finds a difference.
-MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
-                                 unsigned MaxLoadSize, unsigned LoadsPerBlock,
-                                 const DataLayout &TheDataLayout)
-    : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
-      DL(TheDataLayout), Builder(CI) {
-
-  // A memcmp with zero-comparison with only one block of load and compare does
-  // not need to set up any extra blocks. This case could be handled in the DAG,
-  // but since we have all of the machinery to flexibly expand any memcpy here,
-  // we choose to handle this case too to avoid fragmented lowering.
-  IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
-  NumBlocks = calculateNumBlocks(Size);
-  if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
-    BasicBlock *StartBlock = CI->getParent();
-    EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
-    setupEndBlockPHINodes();
-    createResultBlock();
-
-    // If return value of memcmp is not used in a zero equality, we need to
-    // calculate which source was larger. The calculation requires the
-    // two loaded source values of each load compare block.
-    // These will be saved in the phi nodes created by setupResultBlockPHINodes.
-    if (!IsUsedForZeroCmp)
-      setupResultBlockPHINodes();
-
-    // Create the number of required load compare basic blocks.
-    createLoadCmpBlocks();
-
-    // Update the terminator added by splitBasicBlock to branch to the first
-    // LoadCmpBlock.
-    StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
-  }
-
-  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-}
-
-void MemCmpExpansion::createLoadCmpBlocks() {
-  for (unsigned i = 0; i < NumBlocks; i++) {
-    BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
-                                        EndBlock->getParent(), EndBlock);
-    LoadCmpBlocks.push_back(BB);
-  }
-}
-
-void MemCmpExpansion::createResultBlock() {
-  ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
-                                   EndBlock->getParent(), EndBlock);
-}
-
-// This function creates the IR instructions for loading and comparing 1 byte.
-// It loads 1 byte from each source of the memcmp parameters with the given
-// GEPIndex. It then subtracts the two loaded values and adds this result to the
-// final phi node for selecting the memcmp result.
-void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
-                                               unsigned GEPIndex) {
-  Value *Source1 = CI->getArgOperand(0);
-  Value *Source2 = CI->getArgOperand(1);
-
-  Builder.SetInsertPoint(LoadCmpBlocks[Index]);
-  Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
-  // Cast source to LoadSizeType*.
-  if (Source1->getType() != LoadSizeType)
-    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-  if (Source2->getType() != LoadSizeType)
-    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-  // Get the base address using the GEPIndex.
-  if (GEPIndex != 0) {
-    Source1 = Builder.CreateGEP(LoadSizeType, Source1,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-    Source2 = Builder.CreateGEP(LoadSizeType, Source2,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-  }
-
-  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
-  LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
-  LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
-  Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
-
-  PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]);
-
-  if (Index < (LoadCmpBlocks.size() - 1)) {
-    // Early exit branch if difference found to EndBlock. Otherwise, continue to
-    // next LoadCmpBlock,
-    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
-                                    ConstantInt::get(Diff->getType(), 0));
-    BranchInst *CmpBr =
-        BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp);
-    Builder.Insert(CmpBr);
-  } else {
-    // The last block has an unconditional branch to EndBlock.
-    BranchInst *CmpBr = BranchInst::Create(EndBlock);
-    Builder.Insert(CmpBr);
-  }
-}
-
-unsigned MemCmpExpansion::getNumLoads(unsigned Size) {
-  return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize);
-}
-
-unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
-  return MinAlign(PowerOf2Floor(Size), MaxLoadSize);
-}
-
-/// Generate an equality comparison for one or more pairs of loaded values.
-/// This is used in the case where the memcmp() call is compared equal or not
-/// equal to zero.
-Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
-                                            unsigned &NumBytesProcessed) {
-  std::vector<Value *> XorList, OrList;
-  Value *Diff;
-
-  unsigned RemainingBytes = Size - NumBytesProcessed;
-  unsigned NumLoadsRemaining = getNumLoads(RemainingBytes);
-  unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
-
-  // For a single-block expansion, start inserting before the memcmp call.
-  if (LoadCmpBlocks.empty())
-    Builder.SetInsertPoint(CI);
-  else
-    Builder.SetInsertPoint(LoadCmpBlocks[Index]);
-
-  Value *Cmp = nullptr;
-  for (unsigned i = 0; i < NumLoads; ++i) {
-    unsigned LoadSize = getLoadSize(RemainingBytes);
-    unsigned GEPIndex = NumBytesProcessed / LoadSize;
-    NumBytesProcessed += LoadSize;
-    RemainingBytes -= LoadSize;
-
-    Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
-    Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
-    assert(LoadSize <= MaxLoadSize && "Unexpected load type");
-
-    Value *Source1 = CI->getArgOperand(0);
-    Value *Source2 = CI->getArgOperand(1);
-
-    // Cast source to LoadSizeType*.
-    if (Source1->getType() != LoadSizeType)
-      Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-    if (Source2->getType() != LoadSizeType)
-      Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-    // Get the base address using the GEPIndex.
-    if (GEPIndex != 0) {
-      Source1 = Builder.CreateGEP(LoadSizeType, Source1,
-                                  ConstantInt::get(LoadSizeType, GEPIndex));
-      Source2 = Builder.CreateGEP(LoadSizeType, Source2,
-                                  ConstantInt::get(LoadSizeType, GEPIndex));
-    }
-
-    // Get a constant or load a value for each source address.
-    Value *LoadSrc1 = nullptr;
-    if (auto *Source1C = dyn_cast<Constant>(Source1))
-      LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
-    if (!LoadSrc1)
-      LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-
-    Value *LoadSrc2 = nullptr;
-    if (auto *Source2C = dyn_cast<Constant>(Source2))
-      LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
-    if (!LoadSrc2)
-      LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
-    if (NumLoads != 1) {
-      if (LoadSizeType != MaxLoadType) {
-        LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
-        LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
-      }
-      // If we have multiple loads per block, we need to generate a composite
-      // comparison using xor+or.
-      Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
-      Diff = Builder.CreateZExt(Diff, MaxLoadType);
-      XorList.push_back(Diff);
-    } else {
-      // If there's only one load per block, we just compare the loaded values.
-      Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
-    }
-  }
-
-  auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
-    std::vector<Value *> OutList;
-    for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
-      Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
-      OutList.push_back(Or);
-    }
-    if (InList.size() % 2 != 0)
-      OutList.push_back(InList.back());
-    return OutList;
-  };
-
-  if (!Cmp) {
-    // Pairwise OR the XOR results.
-    OrList = pairWiseOr(XorList);
-
-    // Pairwise OR the OR results until one result left.
-    while (OrList.size() != 1) {
-      OrList = pairWiseOr(OrList);
-    }
-    Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
-  }
-
-  return Cmp;
-}
-
-void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
-    unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
-  Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);
-
-  BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
-                           ? EndBlock
-                           : LoadCmpBlocks[Index + 1];
-  // Early exit branch if difference found to ResultBlock. Otherwise,
-  // continue to next LoadCmpBlock or EndBlock.
-  BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
-  Builder.Insert(CmpBr);
-
-  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
-  // since early exit to ResultBlock was not taken (no difference was found in
-  // any of the bytes).
-  if (Index == LoadCmpBlocks.size() - 1) {
-    Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
-    PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
-  }
-}
-
-// This function creates the IR intructions for loading and comparing using the
-// given LoadSize. It loads the number of bytes specified by LoadSize from each
-// source of the memcmp parameters. It then does a subtract to see if there was
-// a difference in the loaded values. If a difference is found, it branches
-// with an early exit to the ResultBlock for calculating which source was
-// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
-// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
-// a special case through emitLoadCompareByteBlock. The special handling can
-// simply subtract the loaded values and add it to the result phi node.
-void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
-                                           unsigned GEPIndex) {
-  if (LoadSize == 1) {
-    MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
-    return;
-  }
-
-  Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
-  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
-  assert(LoadSize <= MaxLoadSize && "Unexpected load type");
-
-  Value *Source1 = CI->getArgOperand(0);
-  Value *Source2 = CI->getArgOperand(1);
-
-  Builder.SetInsertPoint(LoadCmpBlocks[Index]);
-  // Cast source to LoadSizeType*.
-  if (Source1->getType() != LoadSizeType)
-    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-  if (Source2->getType() != LoadSizeType)
-    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-  // Get the base address using the GEPIndex.
-  if (GEPIndex != 0) {
-    Source1 = Builder.CreateGEP(LoadSizeType, Source1,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-    Source2 = Builder.CreateGEP(LoadSizeType, Source2,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-  }
-
-  // Load LoadSizeType from the base address.
-  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
-  if (DL.isLittleEndian()) {
-    Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
-                                                Intrinsic::bswap, LoadSizeType);
-    LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
-    LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
-  }
-
-  if (LoadSizeType != MaxLoadType) {
-    LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
-    LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
-  }
-
-  // Add the loaded values to the phi nodes for calculating memcmp result only
-  // if result is not used in a zero equality.
-  if (!IsUsedForZeroCmp) {
-    ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]);
-    ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
-  }
-
-  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
-  BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
-                           ? EndBlock
-                           : LoadCmpBlocks[Index + 1];
-  // Early exit branch if difference found to ResultBlock. Otherwise, continue
-  // to next LoadCmpBlock or EndBlock.
-  BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
-  Builder.Insert(CmpBr);
-
-  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
-  // since early exit to ResultBlock was not taken (no difference was found in
-  // any of the bytes).
-  if (Index == LoadCmpBlocks.size() - 1) {
-    Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
-    PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
-  }
-}
-
-// This function populates the ResultBlock with a sequence to calculate the
-// memcmp result. It compares the two loaded source values and returns -1 if
-// src1 < src2 and 1 if src1 > src2.
-void MemCmpExpansion::emitMemCmpResultBlock() {
-  // Special case: if memcmp result is used in a zero equality, result does not
-  // need to be calculated and can simply return 1.
-  if (IsUsedForZeroCmp) {
-    BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
-    Builder.SetInsertPoint(ResBlock.BB, InsertPt);
-    Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
-    PhiRes->addIncoming(Res, ResBlock.BB);
-    BranchInst *NewBr = BranchInst::Create(EndBlock);
-    Builder.Insert(NewBr);
-    return;
-  }
-  BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
-  Builder.SetInsertPoint(ResBlock.BB, InsertPt);
-
-  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
-                                  ResBlock.PhiSrc2);
-
-  Value *Res =
-      Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
-                           ConstantInt::get(Builder.getInt32Ty(), 1));
-
-  BranchInst *NewBr = BranchInst::Create(EndBlock);
-  Builder.Insert(NewBr);
-  PhiRes->addIncoming(Res, ResBlock.BB);
-}
-
-unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
-  unsigned NumBlocks = 0;
-  bool HaveOneByteLoad = false;
-  unsigned RemainingSize = Size;
-  unsigned LoadSize = MaxLoadSize;
-  while (RemainingSize) {
-    if (LoadSize == 1)
-      HaveOneByteLoad = true;
-    NumBlocks += RemainingSize / LoadSize;
-    RemainingSize = RemainingSize % LoadSize;
-    LoadSize = LoadSize / 2;
-  }
-  NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks;
-
-  if (IsUsedForZeroCmp)
-    NumBlocks = NumBlocks / NumLoadsPerBlock +
-                (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0);
-
-  return NumBlocks;
-}
-
-void MemCmpExpansion::setupResultBlockPHINodes() {
-  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
-  Builder.SetInsertPoint(ResBlock.BB);
-  ResBlock.PhiSrc1 =
-      Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1");
-  ResBlock.PhiSrc2 =
-      Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2");
-}
-
-void MemCmpExpansion::setupEndBlockPHINodes() {
-  Builder.SetInsertPoint(&EndBlock->front());
-  PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
-}
-
-Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
-  unsigned NumBytesProcessed = 0;
-  // This loop populates each of the LoadCmpBlocks with the IR sequence to
-  // handle multiple loads per block.
-  for (unsigned i = 0; i < NumBlocks; ++i)
-    emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed);
-
-  emitMemCmpResultBlock();
-  return PhiRes;
-}
-
-/// A memcmp expansion that compares equality with 0 and only has one block of
-/// load and compare can bypass the compare, branch, and phi IR that is required
-/// in the general case.
-Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
-  unsigned NumBytesProcessed = 0;
-  Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);
-  return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
-}
-
-/// A memcmp expansion that only has one block of load and compare can bypass
-/// the compare, branch, and phi IR that is required in the general case.
-Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
-  assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
-
-  Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
-  Value *Source1 = CI->getArgOperand(0);
-  Value *Source2 = CI->getArgOperand(1);
-
-  // Cast source to LoadSizeType*.
-  if (Source1->getType() != LoadSizeType)
-    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-  if (Source2->getType() != LoadSizeType)
-    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-  // Load LoadSizeType from the base address.
-  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-
-  if (DL.isLittleEndian() && Size != 1) {
-    Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
-                                                Intrinsic::bswap, LoadSizeType);
-    LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
-    LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
-  }
-
-  // TODO: Instead of comparing ULT, just subtract and return the difference?
-  Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
-  Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
-  Type *I32 = Builder.getInt32Ty();
-  Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
-                                             ConstantInt::get(I32, 1));
-  return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
-}
-
-// This function expands the memcmp call into an inline expansion and returns
-// the memcmp result.
-Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
-  if (IsUsedForZeroCmp)
-    return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
-                            getMemCmpExpansionZeroCase(Size);
-
-  // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
-  if (NumBlocks == 1 && NumLoadsPerBlock == 1)
-    return getMemCmpOneBlock(Size);
-
-  // This loop calls emitLoadCompareBlock for comparing Size bytes of the two
-  // memcmp sources. It starts with loading using the maximum load size set by
-  // the target. It processes any remaining bytes using a load size which is the
-  // next smallest power of 2.
-  unsigned LoadSize = MaxLoadSize;
-  unsigned NumBytesToBeProcessed = Size;
-  unsigned Index = 0;
-  while (NumBytesToBeProcessed) {
-    // Calculate how many blocks we can create with the current load size.
-    unsigned NumBlocks = NumBytesToBeProcessed / LoadSize;
-    unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
-    NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
-
-    // For each NumBlocks, populate the instruction sequence for loading and
-    // comparing LoadSize bytes.
-    while (NumBlocks--) {
-      emitLoadCompareBlock(Index, LoadSize, GEPIndex);
-      Index++;
-      GEPIndex++;
-    }
-    // Get the next LoadSize to use.
-    LoadSize = LoadSize / 2;
-  }
-
-  emitMemCmpResultBlock();
-  return PhiRes;
-}
-
-// This function checks to see if an expansion of memcmp can be generated.
-// It checks for constant compare size that is less than the max inline size.
-// If an expansion cannot occur, returns false to leave as a library call.
-// Otherwise, the library call is replaced with a new IR instruction sequence.
-/// We want to transform:
-/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
-/// To:
-/// loadbb:
-///  %0 = bitcast i32* %buffer2 to i8*
-///  %1 = bitcast i32* %buffer1 to i8*
-///  %2 = bitcast i8* %1 to i64*
-///  %3 = bitcast i8* %0 to i64*
-///  %4 = load i64, i64* %2
-///  %5 = load i64, i64* %3
-///  %6 = call i64 @llvm.bswap.i64(i64 %4)
-///  %7 = call i64 @llvm.bswap.i64(i64 %5)
-///  %8 = sub i64 %6, %7
-///  %9 = icmp ne i64 %8, 0
-///  br i1 %9, label %res_block, label %loadbb1
-/// res_block:                                        ; preds = %loadbb2,
-/// %loadbb1, %loadbb
-///  %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
-///  %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
-///  %10 = icmp ult i64 %phi.src1, %phi.src2
-///  %11 = select i1 %10, i32 -1, i32 1
-///  br label %endblock
-/// loadbb1:                                          ; preds = %loadbb
-///  %12 = bitcast i32* %buffer2 to i8*
-///  %13 = bitcast i32* %buffer1 to i8*
-///  %14 = bitcast i8* %13 to i32*
-///  %15 = bitcast i8* %12 to i32*
-///  %16 = getelementptr i32, i32* %14, i32 2
-///  %17 = getelementptr i32, i32* %15, i32 2
-///  %18 = load i32, i32* %16
-///  %19 = load i32, i32* %17
-///  %20 = call i32 @llvm.bswap.i32(i32 %18)
-///  %21 = call i32 @llvm.bswap.i32(i32 %19)
-///  %22 = zext i32 %20 to i64
-///  %23 = zext i32 %21 to i64
-///  %24 = sub i64 %22, %23
-///  %25 = icmp ne i64 %24, 0
-///  br i1 %25, label %res_block, label %loadbb2
-/// loadbb2:                                          ; preds = %loadbb1
-///  %26 = bitcast i32* %buffer2 to i8*
-///  %27 = bitcast i32* %buffer1 to i8*
-///  %28 = bitcast i8* %27 to i16*
-///  %29 = bitcast i8* %26 to i16*
-///  %30 = getelementptr i16, i16* %28, i16 6
-///  %31 = getelementptr i16, i16* %29, i16 6
-///  %32 = load i16, i16* %30
-///  %33 = load i16, i16* %31
-///  %34 = call i16 @llvm.bswap.i16(i16 %32)
-///  %35 = call i16 @llvm.bswap.i16(i16 %33)
-///  %36 = zext i16 %34 to i64
-///  %37 = zext i16 %35 to i64
-///  %38 = sub i64 %36, %37
-///  %39 = icmp ne i64 %38, 0
-///  br i1 %39, label %res_block, label %loadbb3
-/// loadbb3:                                          ; preds = %loadbb2
-///  %40 = bitcast i32* %buffer2 to i8*
-///  %41 = bitcast i32* %buffer1 to i8*
-///  %42 = getelementptr i8, i8* %41, i8 14
-///  %43 = getelementptr i8, i8* %40, i8 14
-///  %44 = load i8, i8* %42
-///  %45 = load i8, i8* %43
-///  %46 = zext i8 %44 to i32
-///  %47 = zext i8 %45 to i32
-///  %48 = sub i32 %46, %47
-///  br label %endblock
-/// endblock:                                         ; preds = %res_block,
-/// %loadbb3
-///  %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
-///  ret i32 %phi.res
-static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
-                         const TargetLowering *TLI, const DataLayout *DL) {
-  NumMemCmpCalls++;
-
-  // TTI call to check if target would like to expand memcmp. Also, get the
-  // MaxLoadSize.
-  unsigned MaxLoadSize;
-  if (!TTI->expandMemCmp(CI, MaxLoadSize))
-    return false;
-
-  // Early exit from expansion if -Oz.
-  if (CI->getFunction()->optForMinSize())
-    return false;
-
-  // Early exit from expansion if size is not a constant.
-  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-  if (!SizeCast) {
-    NumMemCmpNotConstant++;
-    return false;
-  }
-
-  // Early exit from expansion if size greater than max bytes to load.
-  uint64_t SizeVal = SizeCast->getZExtValue();
-  unsigned NumLoads = 0;
-  unsigned RemainingSize = SizeVal;
-  unsigned LoadSize = MaxLoadSize;
-  while (RemainingSize) {
-    NumLoads += RemainingSize / LoadSize;
-    RemainingSize = RemainingSize % LoadSize;
-    LoadSize = LoadSize / 2;
-  }
-
-  if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
-    NumMemCmpGreaterThanMax++;
-    return false;
-  }
-
-  NumMemCmpInlined++;
-
-  // MemCmpHelper object creates and sets up basic blocks required for
-  // expanding memcmp with size SizeVal.
-  unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock;
-  MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL);
-
-  Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal);
-
-  // Replace call with result of expansion and erase call.
-  CI->replaceAllUsesWith(Res);
-  CI->eraseFromParent();
-
-  return true;
-}
-
 bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
   BasicBlock *BB = CI->getParent();
 
@@ -2443,12 +1719,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
     return true;
   }
 
-  LibFunc Func;
-  if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) &&
-      Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) {
-    ModifiedDT = true;
-    return true;
-  }
   return false;
 }
 
@@ -2599,19 +1869,125 @@ namespace {
 /// This is an extended version of TargetLowering::AddrMode
 /// which holds actual Value*'s for register values.
 struct ExtAddrMode : public TargetLowering::AddrMode {
-  Value *BaseReg;
-  Value *ScaledReg;
-  ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
+  Value *BaseReg = nullptr;
+  Value *ScaledReg = nullptr;
+  Value *OriginalValue = nullptr;
+
+  enum FieldName {
+    NoField        = 0x00,
+    BaseRegField   = 0x01,
+    BaseGVField    = 0x02,
+    BaseOffsField  = 0x04,
+    ScaledRegField = 0x08,
+    ScaleField     = 0x10,
+    MultipleFields = 0xff
+  };
+
+  ExtAddrMode() = default;
+
   void print(raw_ostream &OS) const;
   void dump() const;
 
-  bool operator==(const ExtAddrMode& O) const {
-    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
-           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
-           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+  FieldName compare(const ExtAddrMode &other) {
+    // First check that the types are the same on each field, as differing types
+    // is something we can't cope with later on.
+    if (BaseReg && other.BaseReg &&
+        BaseReg->getType() != other.BaseReg->getType())
+      return MultipleFields;
+    if (BaseGV && other.BaseGV &&
+        BaseGV->getType() != other.BaseGV->getType())
+      return MultipleFields;
+    if (ScaledReg && other.ScaledReg &&
+        ScaledReg->getType() != other.ScaledReg->getType())
+      return MultipleFields;
+
+    // Check each field to see if it differs.
+    unsigned Result = NoField;
+    if (BaseReg != other.BaseReg)
+      Result |= BaseRegField;
+    if (BaseGV != other.BaseGV)
+      Result |= BaseGVField;
+    if (BaseOffs != other.BaseOffs)
+      Result |= BaseOffsField;
+    if (ScaledReg != other.ScaledReg)
+      Result |= ScaledRegField;
+    // Don't count 0 as being a different scale, because that actually means
+    // unscaled (which will already be counted by having no ScaledReg).
+    if (Scale && other.Scale && Scale != other.Scale)
+      Result |= ScaleField;
+
+    if (countPopulation(Result) > 1)
+      return MultipleFields;
+    else
+      return static_cast<FieldName>(Result);
+  }
+
+  // An AddrMode is trivial if it involves no calculation i.e. it is just a base
+  // with no offset.
+  bool isTrivial() {
+    // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
+    // trivial if at most one of these terms is nonzero, except that BaseGV and
+    // BaseReg both being zero actually means a null pointer value, which we
+    // consider to be 'non-zero' here.
+    return !BaseOffs && !Scale && !(BaseGV && BaseReg);
+  }
+
+  Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
+    switch (Field) {
+    default:
+      return nullptr;
+    case BaseRegField:
+      return BaseReg;
+    case BaseGVField:
+      return BaseGV;
+    case ScaledRegField:
+      return ScaledReg;
+    case BaseOffsField:
+      return ConstantInt::get(IntPtrTy, BaseOffs);
+    }
+  }
+
+  void SetCombinedField(FieldName Field, Value *V,
+                        const SmallVectorImpl<ExtAddrMode> &AddrModes) {
+    switch (Field) {
+    default:
+      llvm_unreachable("Unhandled fields are expected to be rejected earlier");
+      break;
+    case ExtAddrMode::BaseRegField:
+      BaseReg = V;
+      break;
+    case ExtAddrMode::BaseGVField:
+      // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
+      // in the BaseReg field.
+      assert(BaseReg == nullptr);
+      BaseReg = V;
+      BaseGV = nullptr;
+      break;
+    case ExtAddrMode::ScaledRegField:
+      ScaledReg = V;
+      // If we have a mix of scaled and unscaled addrmodes then we want scale
+      // to be the scale and not zero.
+      if (!Scale)
+        for (const ExtAddrMode &AM : AddrModes)
+          if (AM.Scale) {
+            Scale = AM.Scale;
+            break;
+          }
+      break;
+    case ExtAddrMode::BaseOffsField:
+      // The offset is no longer a constant, so it goes in ScaledReg with a
+      // scale of 1.
+      assert(ScaledReg == nullptr);
+      ScaledReg = V;
+      Scale = 1;
+      BaseOffs = 0;
+      break;
+    }
   }
 };
 
+} // end anonymous namespace
+
 #ifndef NDEBUG
 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
   AM.print(OS);
@@ -2619,6 +1995,7 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
 }
 #endif
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
@@ -2650,18 +2027,18 @@ void ExtAddrMode::print(raw_ostream &OS) const {
   OS << ']';
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
 #endif
 
+namespace {
+
 /// \brief This class provides transaction based operation on the IR.
 /// Every change made through this class is recorded in the internal state and
 /// can be undone (rollback) until commit is called.
 class TypePromotionTransaction {
-
   /// \brief This represents the common interface of the individual transaction.
   /// Each class implements the logic for doing one specific modification on
   /// the IR via the TypePromotionTransaction.
@@ -2675,7 +2052,7 @@ class TypePromotionTransaction {
     /// The constructor performs the related action on the IR.
     TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
 
-    virtual ~TypePromotionAction() {}
+    virtual ~TypePromotionAction() = default;
 
     /// \brief Undo the modification done by this action.
     /// When this method is called, the IR must be in the same state as it was
@@ -2702,6 +2079,7 @@ class TypePromotionTransaction {
       Instruction *PrevInst;
       BasicBlock *BB;
     } Point;
+
     /// Remember whether or not the instruction had a previous instruction.
     bool HasPrevInstruction;
 
@@ -2756,6 +2134,7 @@ class TypePromotionTransaction {
   class OperandSetter : public TypePromotionAction {
     /// Original operand of the instruction.
     Value *Origin;
+
     /// Index of the modified instruction.
     unsigned Idx;
 
@@ -2813,6 +2192,7 @@ class TypePromotionTransaction {
   /// \brief Build a truncate instruction.
   class TruncBuilder : public TypePromotionAction {
     Value *Val;
+
   public:
     /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
     /// result.
@@ -2837,6 +2217,7 @@ class TypePromotionTransaction {
   /// \brief Build a sign extension instruction.
   class SExtBuilder : public TypePromotionAction {
     Value *Val;
+
   public:
     /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
     /// result.
@@ -2862,6 +2243,7 @@ class TypePromotionTransaction {
   /// \brief Build a zero extension instruction.
   class ZExtBuilder : public TypePromotionAction {
     Value *Val;
+
   public:
     /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
     /// result.
@@ -2912,15 +2294,18 @@ class TypePromotionTransaction {
     struct InstructionAndIdx {
       /// The instruction using the instruction.
       Instruction *Inst;
+
       /// The index where this instruction is used for Inst.
       unsigned Idx;
+
       InstructionAndIdx(Instruction *Inst, unsigned Idx)
           : Inst(Inst), Idx(Idx) {}
     };
 
     /// Keep track of the original uses (pair Instruction, Index).
     SmallVector<InstructionAndIdx, 4> OriginalUses;
-    typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
+
+    using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
 
   public:
     /// \brief Replace all the use of \p Inst by \p New.
@@ -2951,11 +2336,14 @@ class TypePromotionTransaction {
   class InstructionRemover : public TypePromotionAction {
     /// Original position of the instruction.
     InsertionHandler Inserter;
+
     /// Helper structure to hide all the link to the instruction. In other
     /// words, this helps to do as if the instruction was removed.
     OperandsHider Hider;
+
     /// Keep track of the uses replaced, if any.
-    UsesReplacer *Replacer;
+    UsesReplacer *Replacer = nullptr;
+
     /// Keep track of instructions removed.
     SetOfInstrs &RemovedInsts;
 
@@ -2967,7 +2355,7 @@ class TypePromotionTransaction {
     InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
                        Value *New = nullptr)
         : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
-          Replacer(nullptr), RemovedInsts(RemovedInsts) {
+          RemovedInsts(RemovedInsts) {
       if (New)
         Replacer = new UsesReplacer(Inst, New);
       DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
@@ -2996,15 +2384,17 @@ public:
   /// Restoration point.
   /// The restoration point is a pointer to an action instead of an iterator
   /// because the iterator may be invalidated but not the pointer.
-  typedef const TypePromotionAction *ConstRestorationPt;
+  using ConstRestorationPt = const TypePromotionAction *;
 
   TypePromotionTransaction(SetOfInstrs &RemovedInsts)
       : RemovedInsts(RemovedInsts) {}
 
   /// Advocate every changes made in that transaction.
   void commit();
+
   /// Undo all the changes made after the given point.
   void rollback(ConstRestorationPt Point);
+
   /// Get the current restoration point.
   ConstRestorationPt getRestorationPoint() const;
 
@@ -3012,18 +2402,25 @@ public:
   /// @{
   /// Same as Instruction::setOperand.
   void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
+
   /// Same as Instruction::eraseFromParent.
   void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
+
   /// Same as Value::replaceAllUsesWith.
   void replaceAllUsesWith(Instruction *Inst, Value *New);
+
   /// Same as Value::mutateType.
   void mutateType(Instruction *Inst, Type *NewTy);
+
   /// Same as IRBuilder::createTrunc.
   Value *createTrunc(Instruction *Opnd, Type *Ty);
+
   /// Same as IRBuilder::createSExt.
   Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+
   /// Same as IRBuilder::createZExt.
   Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
+
   /// Same as Instruction::moveBefore.
   void moveBefore(Instruction *Inst, Instruction *Before);
   /// @}
@@ -3031,30 +2428,36 @@ public:
 private:
   /// The ordered list of actions made so far.
   SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
-  typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+
+  using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
+
   SetOfInstrs &RemovedInsts;
 };
 
+} // end anonymous namespace
+
 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
                                           Value *NewVal) {
-  Actions.push_back(
-      make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
+  Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>(
+      Inst, Idx, NewVal));
 }
 
 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
                                                 Value *NewVal) {
   Actions.push_back(
-      make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
-                                                         RemovedInsts, NewVal));
+      llvm::make_unique<TypePromotionTransaction::InstructionRemover>(
+          Inst, RemovedInsts, NewVal));
 }
 
 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
                                                   Value *New) {
-  Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
+  Actions.push_back(
+      llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
 }
 
 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
-  Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
+  Actions.push_back(
+      llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
 }
 
 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
@@ -3084,7 +2487,8 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst,
 void TypePromotionTransaction::moveBefore(Instruction *Inst,
                                           Instruction *Before) {
   Actions.push_back(
-      make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
+      llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
+          Inst, Before));
 }
 
 TypePromotionTransaction::ConstRestorationPt
@@ -3107,6 +2511,8 @@ void TypePromotionTransaction::rollback(
   }
 }
 
+namespace {
+
 /// \brief A helper class for matching addressing modes.
 ///
 /// This encapsulates the logic for matching the target-legal addressing modes.
@@ -3128,8 +2534,10 @@ class AddressingModeMatcher {
 
   /// The instructions inserted by other CodeGenPrepare optimizations.
   const SetOfInstrs &InsertedInsts;
+
   /// A map from the instructions to their type before promotion.
   InstrToOrigTy &PromotedInsts;
+
   /// The ongoing transaction where every action should be registered.
   TypePromotionTransaction &TPT;
 
@@ -3151,8 +2559,8 @@ class AddressingModeMatcher {
         PromotedInsts(PromotedInsts), TPT(TPT) {
     IgnoreProfitability = false;
   }
-public:
 
+public:
   /// Find the maximal addressing mode that a load/store of V can fold,
   /// give an access type of AccessTy.  This returns a list of involved
   /// instructions in AddrModeInsts.
@@ -3177,6 +2585,7 @@ public:
     (void)Success; assert(Success && "Couldn't select *anything*?");
     return Result;
   }
+
 private:
   bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
   bool matchAddr(Value *V, unsigned Depth);
@@ -3190,6 +2599,520 @@ private:
                              Value *PromotedOperand) const;
 };
 
+/// \brief Keep track of simplification of Phi nodes.
+/// Accept the set of all phi nodes and erase phi node from this set
+/// if it is simplified.
+class SimplificationTracker {
+  DenseMap<Value *, Value *> Storage;
+  const SimplifyQuery &SQ;
+  SmallPtrSetImpl<PHINode *> &AllPhiNodes;
+  SmallPtrSetImpl<SelectInst *> &AllSelectNodes;
+
+public:
+  SimplificationTracker(const SimplifyQuery &sq,
+                        SmallPtrSetImpl<PHINode *> &APN,
+                        SmallPtrSetImpl<SelectInst *> &ASN)
+      : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {}
+
+  Value *Get(Value *V) {
+    do {
+      auto SV = Storage.find(V);
+      if (SV == Storage.end())
+        return V;
+      V = SV->second;
+    } while (true);
+  }
+
+  Value *Simplify(Value *Val) {
+    SmallVector<Value *, 32> WorkList;
+    SmallPtrSet<Value *, 32> Visited;
+    WorkList.push_back(Val);
+    while (!WorkList.empty()) {
+      auto P = WorkList.pop_back_val();
+      if (!Visited.insert(P).second)
+        continue;
+      if (auto *PI = dyn_cast<Instruction>(P))
+        if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
+          for (auto *U : PI->users())
+            WorkList.push_back(cast<Value>(U));
+          Put(PI, V);
+          PI->replaceAllUsesWith(V);
+          if (auto *PHI = dyn_cast<PHINode>(PI))
+            AllPhiNodes.erase(PHI);
+          if (auto *Select = dyn_cast<SelectInst>(PI))
+            AllSelectNodes.erase(Select);
+          PI->eraseFromParent();
+        }
+    }
+    return Get(Val);
+  }
+
+  void Put(Value *From, Value *To) {
+    Storage.insert({ From, To });
+  }
+};
+
+/// \brief A helper class for combining addressing modes.
+class AddressingModeCombiner {
+  typedef std::pair<Value *, BasicBlock *> ValueInBB;
+  typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping;
+  typedef std::pair<PHINode *, PHINode *> PHIPair;
+
+private:
+  /// The addressing modes we've collected.
+  SmallVector<ExtAddrMode, 16> AddrModes;
+
+  /// The field in which the AddrModes differ, when we have more than one.
+  ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
+
+  /// Are the AddrModes that we have all just equal to their original values?
+  bool AllAddrModesTrivial = true;
+
+  /// Common Type for all different fields in addressing modes.
+  Type *CommonType;
+
+  /// SimplifyQuery for simplifyInstruction utility.
+  const SimplifyQuery &SQ;
+
+  /// Original Address.
+  ValueInBB Original;
+
+public:
+  AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)
+      : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
+
+  /// \brief Get the combined AddrMode
+  const ExtAddrMode &getAddrMode() const {
+    return AddrModes[0];
+  }
+
+  /// \brief Add a new AddrMode if it's compatible with the AddrModes we already
+  /// have.
+  /// \return True iff we succeeded in doing so.
+  bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
+    // Take note of if we have any non-trivial AddrModes, as we need to detect
+    // when all AddrModes are trivial as then we would introduce a phi or select
+    // which just duplicates what's already there.
+    AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
+
+    // If this is the first addrmode then everything is fine.
+    if (AddrModes.empty()) {
+      AddrModes.emplace_back(NewAddrMode);
+      return true;
+    }
+
+    // Figure out how different this is from the other address modes, which we
+    // can do just by comparing against the first one given that we only care
+    // about the cumulative difference.
+    ExtAddrMode::FieldName ThisDifferentField =
+      AddrModes[0].compare(NewAddrMode);
+    if (DifferentField == ExtAddrMode::NoField)
+      DifferentField = ThisDifferentField;
+    else if (DifferentField != ThisDifferentField)
+      DifferentField = ExtAddrMode::MultipleFields;
+
+    // If NewAddrMode differs in only one dimension, and that dimension isn't
+    // the amount that ScaledReg is scaled by, then we can handle it by
+    // inserting a phi/select later on. Even if NewAddMode is the same
+    // we still need to collect it due to original value is different.
+    // And later we will need all original values as anchors during
+    // finding the common Phi node.
+    if (DifferentField != ExtAddrMode::MultipleFields &&
+        DifferentField != ExtAddrMode::ScaleField) {
+      AddrModes.emplace_back(NewAddrMode);
+      return true;
+    }
+
+    // We couldn't combine NewAddrMode with the rest, so return failure.
+    AddrModes.clear();
+    return false;
+  }
+
+  /// \brief Combine the addressing modes we've collected into a single
+  /// addressing mode.
+  /// \return True iff we successfully combined them or we only had one so
+  /// didn't need to combine them anyway.
+  bool combineAddrModes() {
+    // If we have no AddrModes then they can't be combined.
+    if (AddrModes.size() == 0)
+      return false;
+
+    // A single AddrMode can trivially be combined.
+    if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
+      return true;
+
+    // If the AddrModes we collected are all just equal to the value they are
+    // derived from then combining them wouldn't do anything useful.
+    if (AllAddrModesTrivial)
+      return false;
+
+    if (!addrModeCombiningAllowed())
+      return false;
+
+    // Build a map between <original value, basic block where we saw it> to
+    // value of base register.
+    // Bail out if there is no common type.
+    FoldAddrToValueMapping Map;
+    if (!initializeMap(Map))
+      return false;
+
+    Value *CommonValue = findCommon(Map);
+    if (CommonValue)
+      AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
+    return CommonValue != nullptr;
+  }
+
+private:
+  /// \brief Initialize Map with anchor values. For address seen in some BB
+  /// we set the value of different field saw in this address.
+  /// If address is not an instruction than basic block is set to null.
+  /// At the same time we find a common type for different field we will
+  /// use to create new Phi/Select nodes. Keep it in CommonType field.
+  /// Return false if there is no common type found.
+  bool initializeMap(FoldAddrToValueMapping &Map) {
+    // Keep track of keys where the value is null. We will need to replace it
+    // with constant null when we know the common type.
+    SmallVector<ValueInBB, 2> NullValue;
+    Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
+    for (auto &AM : AddrModes) {
+      BasicBlock *BB = nullptr;
+      if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue))
+        BB = I->getParent();
+
+      Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
+      if (DV) {
+        auto *Type = DV->getType();
+        if (CommonType && CommonType != Type)
+          return false;
+        CommonType = Type;
+        Map[{ AM.OriginalValue, BB }] = DV;
+      } else {
+        NullValue.push_back({ AM.OriginalValue, BB });
+      }
+    }
+    assert(CommonType && "At least one non-null value must be!");
+    for (auto VIBB : NullValue)
+      Map[VIBB] = Constant::getNullValue(CommonType);
+    return true;
+  }
+
+  /// \brief We have mapping between value A and basic block where value A
+  /// seen to other value B where B was a field in addressing mode represented
+  /// by A. Also we have an original value C representin an address in some
+  /// basic block. Traversing from C through phi and selects we ended up with
+  /// A's in a map. This utility function tries to find a value V which is a
+  /// field in addressing mode C and traversing through phi nodes and selects
+  /// we will end up in corresponded values B in a map.
+  /// The utility will create a new Phi/Selects if needed.
+  // The simple example looks as follows:
+  // BB1:
+  //   p1 = b1 + 40
+  //   br cond BB2, BB3
+  // BB2:
+  //   p2 = b2 + 40
+  //   br BB3
+  // BB3:
+  //   p = phi [p1, BB1], [p2, BB2]
+  //   v = load p
+  // Map is
+  //   <p1, BB1> -> b1
+  //   <p2, BB2> -> b2
+  // Request is
+  //   <p, BB3> -> ?
+  // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3
+  Value *findCommon(FoldAddrToValueMapping &Map) {
+    // Tracks of new created Phi nodes.
+    SmallPtrSet<PHINode *, 32> NewPhiNodes;
+    // Tracks of new created Select nodes.
+    SmallPtrSet<SelectInst *, 32> NewSelectNodes;
+    // Tracks the simplification of new created phi nodes. The reason we use
+    // this mapping is because we will add new created Phi nodes in AddrToBase.
+    // Simplification of Phi nodes is recursive, so some Phi node may
+    // be simplified after we added it to AddrToBase.
+    // Using this mapping we can find the current value in AddrToBase.
+    SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes);
+
+    // First step, DFS to create PHI nodes for all intermediate blocks.
+    // Also fill traverse order for the second step.
+    SmallVector<ValueInBB, 32> TraverseOrder;
+    InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes);
+
+    // Second Step, fill new nodes by merged values and simplify if possible.
+    FillPlaceholders(Map, TraverseOrder, ST);
+
+    if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) {
+      DestroyNodes(NewPhiNodes);
+      DestroyNodes(NewSelectNodes);
+      return nullptr;
+    }
+
+    // Now we'd like to match New Phi nodes to existed ones.
+    unsigned PhiNotMatchedCount = 0;
+    if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
+      DestroyNodes(NewPhiNodes);
+      DestroyNodes(NewSelectNodes);
+      return nullptr;
+    }
+
+    auto *Result = ST.Get(Map.find(Original)->second);
+    if (Result) {
+      NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount;
+      NumMemoryInstsSelectCreated += NewSelectNodes.size();
+    }
+    return Result;
+  }
+
+  /// \brief Destroy nodes from a set.
+  template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) {
+    // For safe erasing, replace the Phi with dummy value first.
+    auto Dummy = UndefValue::get(CommonType);
+    for (auto I : Instructions) {
+      I->replaceAllUsesWith(Dummy);
+      I->eraseFromParent();
+    }
+  }
+
+  /// \brief Try to match PHI node to Candidate.
+  /// Matcher tracks the matched Phi nodes.
+  bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
+                    DenseSet<PHIPair> &Matcher,
+                    SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) {
+    SmallVector<PHIPair, 8> WorkList;
+    Matcher.insert({ PHI, Candidate });
+    WorkList.push_back({ PHI, Candidate });
+    SmallSet<PHIPair, 8> Visited;
+    while (!WorkList.empty()) {
+      auto Item = WorkList.pop_back_val();
+      if (!Visited.insert(Item).second)
+        continue;
+      // We iterate over all incoming values to Phi to compare them.
+      // If values are different and both of them Phi and the first one is a
+      // Phi we added (subject to match) and both of them is in the same basic
+      // block then we can match our pair if values match. So we state that
+      // these values match and add it to work list to verify that.
+      for (auto B : Item.first->blocks()) {
+        Value *FirstValue = Item.first->getIncomingValueForBlock(B);
+        Value *SecondValue = Item.second->getIncomingValueForBlock(B);
+        if (FirstValue == SecondValue)
+          continue;
+
+        PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
+        PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
+
+        // One of them is not Phi or
+        // The first one is not Phi node from the set we'd like to match or
+        // Phi nodes from different basic blocks then
+        // we will not be able to match.
+        if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
+            FirstPhi->getParent() != SecondPhi->getParent())
+          return false;
+
+        // If we already matched them then continue.
+        if (Matcher.count({ FirstPhi, SecondPhi }))
+          continue;
+        // So the values are different and does not match. So we need them to
+        // match.
+        Matcher.insert({ FirstPhi, SecondPhi });
+        // But me must check it.
+        WorkList.push_back({ FirstPhi, SecondPhi });
+      }
+    }
+    return true;
+  }
+
+  /// \brief For the given set of PHI nodes try to find their equivalents.
+  /// Returns false if this matching fails and creation of new Phi is disabled.
+  bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch,
+                   SimplificationTracker &ST, bool AllowNewPhiNodes,
+                   unsigned &PhiNotMatchedCount) {
+    DenseSet<PHIPair> Matched;
+    SmallPtrSet<PHINode *, 8> WillNotMatch;
+    while (PhiNodesToMatch.size()) {
+      PHINode *PHI = *PhiNodesToMatch.begin();
+
+      // Add us, if no Phi nodes in the basic block we do not match.
+      WillNotMatch.clear();
+      WillNotMatch.insert(PHI);
+
+      // Traverse all Phis until we found equivalent or fail to do that.
+      bool IsMatched = false;
+      for (auto &P : PHI->getParent()->phis()) {
+        if (&P == PHI)
+          continue;
+        if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
+          break;
+        // If it does not match, collect all Phi nodes from matcher.
+        // if we end up with no match, them all these Phi nodes will not match
+        // later.
+        for (auto M : Matched)
+          WillNotMatch.insert(M.first);
+        Matched.clear();
+      }
+      if (IsMatched) {
+        // Replace all matched values and erase them.
+        for (auto MV : Matched) {
+          MV.first->replaceAllUsesWith(MV.second);
+          PhiNodesToMatch.erase(MV.first);
+          ST.Put(MV.first, MV.second);
+          MV.first->eraseFromParent();
+        }
+        Matched.clear();
+        continue;
+      }
+      // If we are not allowed to create new nodes then bail out.
+      if (!AllowNewPhiNodes)
+        return false;
+      // Just remove all seen values in matcher. They will not match anything.
+      PhiNotMatchedCount += WillNotMatch.size();
+      for (auto *P : WillNotMatch)
+        PhiNodesToMatch.erase(P);
+    }
+    return true;
+  }
+  /// \brief Fill the placeholder with values from predecessors and simplify it.
+  void FillPlaceholders(FoldAddrToValueMapping &Map,
+                        SmallVectorImpl<ValueInBB> &TraverseOrder,
+                        SimplificationTracker &ST) {
+    while (!TraverseOrder.empty()) {
+      auto Current = TraverseOrder.pop_back_val();
+      assert(Map.find(Current) != Map.end() && "No node to fill!!!");
+      Value *CurrentValue = Current.first;
+      BasicBlock *CurrentBlock = Current.second;
+      Value *V = Map[Current];
+
+      if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
+        // CurrentValue also must be Select.
+        auto *CurrentSelect = cast<SelectInst>(CurrentValue);
+        auto *TrueValue = CurrentSelect->getTrueValue();
+        ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue)
+                                              ? CurrentBlock
+                                              : nullptr };
+        assert(Map.find(TrueItem) != Map.end() && "No True Value!");
+        Select->setTrueValue(ST.Get(Map[TrueItem]));
+        auto *FalseValue = CurrentSelect->getFalseValue();
+        ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue)
+                                                ? CurrentBlock
+                                                : nullptr };
+        assert(Map.find(FalseItem) != Map.end() && "No False Value!");
+        Select->setFalseValue(ST.Get(Map[FalseItem]));
+      } else {
+        // Must be a Phi node then.
+        PHINode *PHI = cast<PHINode>(V);
+        // Fill the Phi node with values from predecessors.
+        bool IsDefinedInThisBB =
+            cast<Instruction>(CurrentValue)->getParent() == CurrentBlock;
+        auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue);
+        for (auto B : predecessors(CurrentBlock)) {
+          Value *PV = IsDefinedInThisBB
+                          ? CurrentPhi->getIncomingValueForBlock(B)
+                          : CurrentValue;
+          ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr };
+          assert(Map.find(item) != Map.end() && "No predecessor Value!");
+          PHI->addIncoming(ST.Get(Map[item]), B);
+        }
+      }
+      // Simplify if possible.
+      Map[Current] = ST.Simplify(V);
+    }
+  }
+
+  /// Starting from value recursively iterates over predecessors up to known
+  /// ending values represented in a map. For each traversed block inserts
+  /// a placeholder Phi or Select.
+  /// Reports all new created Phi/Select nodes by adding them to set.
+  /// Also reports and order in what basic blocks have been traversed.
+  void InsertPlaceholders(FoldAddrToValueMapping &Map,
+                          SmallVectorImpl<ValueInBB> &TraverseOrder,
+                          SmallPtrSetImpl<PHINode *> &NewPhiNodes,
+                          SmallPtrSetImpl<SelectInst *> &NewSelectNodes) {
+    SmallVector<ValueInBB, 32> Worklist;
+    assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&
+           "Address must be a Phi or Select node");
+    auto *Dummy = UndefValue::get(CommonType);
+    Worklist.push_back(Original);
+    while (!Worklist.empty()) {
+      auto Current = Worklist.pop_back_val();
+      // If value is not an instruction it is something global, constant,
+      // parameter and we can say that this value is observable in any block.
+      // Set block to null to denote it.
+      // Also please take into account that it is how we build anchors.
+      if (!isa<Instruction>(Current.first))
+        Current.second = nullptr;
+      // if it is already visited or it is an ending value then skip it.
+      if (Map.find(Current) != Map.end())
+        continue;
+      TraverseOrder.push_back(Current);
+
+      Value *CurrentValue = Current.first;
+      BasicBlock *CurrentBlock = Current.second;
+      // CurrentValue must be a Phi node or select. All others must be covered
+      // by anchors.
+      Instruction *CurrentI = cast<Instruction>(CurrentValue);
+      bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock;
+
+      unsigned PredCount =
+          std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock));
+      // if Current Value is not defined in this basic block we are interested
+      // in values in predecessors.
+      if (!IsDefinedInThisBB) {
+        assert(PredCount && "Unreachable block?!");
+        PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
+                                       &CurrentBlock->front());
+        Map[Current] = PHI;
+        NewPhiNodes.insert(PHI);
+        // Add all predecessors in work list.
+        for (auto B : predecessors(CurrentBlock))
+          Worklist.push_back({ CurrentValue, B });
+        continue;
+      }
+      // Value is defined in this basic block.
+      if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) {
+        // Is it OK to get metadata from OrigSelect?!
+        // Create a Select placeholder with dummy value.
+        SelectInst *Select =
+            SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,
+                               OrigSelect->getName(), OrigSelect, OrigSelect);
+        Map[Current] = Select;
+        NewSelectNodes.insert(Select);
+        // We are interested in True and False value in this basic block.
+        Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });
+        Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock });
+      } else {
+        // It must be a Phi node then.
+        auto *CurrentPhi = cast<PHINode>(CurrentI);
+        // Create new Phi node for merge of bases.
+        assert(PredCount && "Unreachable block?!");
+        PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
+                                       &CurrentBlock->front());
+        Map[Current] = PHI;
+        NewPhiNodes.insert(PHI);
+
+        // Add all predecessors in work list.
+        for (auto B : predecessors(CurrentBlock))
+          Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B });
+      }
+    }
+  }
+
+  bool addrModeCombiningAllowed() {
+    if (DisableComplexAddrModes)
+      return false;
+    switch (DifferentField) {
+    default:
+      return false;
+    case ExtAddrMode::BaseRegField:
+      return AddrSinkCombineBaseReg;
+    case ExtAddrMode::BaseGVField:
+      return AddrSinkCombineBaseGV;
+    case ExtAddrMode::BaseOffsField:
+      return AddrSinkCombineBaseOffs;
+    case ExtAddrMode::ScaledRegField:
+      return AddrSinkCombineScaledReg;
+    }
+  }
+};
+} // end anonymous namespace
+
 /// Try adding ScaleReg*Scale to the current addressing mode.
 /// Return true and update AddrMode if this addr mode is legal for the target,
 /// false if not.
@@ -3294,6 +3217,8 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI,
       ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
 }
 
+namespace {
+
 /// \brief Hepler class to perform type promotion.
 class TypePromotionHelper {
   /// \brief Utility function to check whether or not a sign or zero extension
@@ -3370,12 +3295,13 @@ class TypePromotionHelper {
 
 public:
   /// Type for the utility function that promotes the operand of Ext.
-  typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
-                           InstrToOrigTy &PromotedInsts,
-                           unsigned &CreatedInstsCost,
-                           SmallVectorImpl<Instruction *> *Exts,
-                           SmallVectorImpl<Instruction *> *Truncs,
-                           const TargetLowering &TLI);
+  using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
+                            InstrToOrigTy &PromotedInsts,
+                            unsigned &CreatedInstsCost,
+                            SmallVectorImpl<Instruction *> *Exts,
+                            SmallVectorImpl<Instruction *> *Truncs,
+                            const TargetLowering &TLI);
+
   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
   /// action to promote the operand of \p Ext instead of using Ext.
   /// \return NULL if no promotable action is possible with the current
@@ -3390,6 +3316,8 @@ public:
                           const InstrToOrigTy &PromotedInsts);
 };
 
+} // end anonymous namespace
+
 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
                                         Type *ConsideredExtType,
                                         const InstrToOrigTy &PromotedInsts,
@@ -3488,7 +3416,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
 }
 
 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
-    llvm::Instruction *SExt, TypePromotionTransaction &TPT,
+    Instruction *SExt, TypePromotionTransaction &TPT,
     InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
     SmallVectorImpl<Instruction *> *Exts,
     SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
@@ -3552,9 +3480,8 @@ Value *TypePromotionHelper::promoteOperandForOther(
     // Create the truncate now.
     Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
     if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
-      ITrunc->removeFromParent();
       // Insert it just after the definition.
-      ITrunc->insertAfter(ExtOpnd);
+      ITrunc->moveAfter(ExtOpnd);
       if (Truncs)
         Truncs->push_back(ITrunc);
     }
@@ -3752,7 +3679,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
   case Instruction::Shl: {
     // Can only handle X*C and X << C.
     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
-    if (!RHS)
+    if (!RHS || RHS->getBitWidth() > 64)
       return false;
     int64_t Scale = RHS->getSExtValue();
     if (Opcode == Instruction::Shl)
@@ -4234,8 +4161,6 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
   return true;
 }
 
-} // end anonymous namespace
-
 /// Return true if the specified values are defined in a
 /// different basic block than BB.
 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
@@ -4273,13 +4198,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   SmallPtrSet<Value*, 16> Visited;
   worklist.push_back(Addr);
 
-  // Use a worklist to iteratively look through PHI nodes, and ensure that
-  // the addressing mode obtained from the non-PHI roots of the graph
-  // are equivalent.
-  bool AddrModeFound = false;
-  bool PhiSeen = false;
+  // Use a worklist to iteratively look through PHI and select nodes, and
+  // ensure that the addressing mode obtained from the non-PHI/select roots of
+  // the graph are compatible.
+  bool PhiOrSelectSeen = false;
   SmallVector<Instruction*, 16> AddrModeInsts;
-  ExtAddrMode AddrMode;
+  const SimplifyQuery SQ(*DL, TLInfo);
+  AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() });
   TypePromotionTransaction TPT(RemovedInsts);
   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
       TPT.getRestorationPoint();
@@ -4303,7 +4228,14 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     if (PHINode *P = dyn_cast<PHINode>(V)) {
       for (Value *IncValue : P->incoming_values())
         worklist.push_back(IncValue);
-      PhiSeen = true;
+      PhiOrSelectSeen = true;
+      continue;
+    }
+    // Similar for select.
+    if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+      worklist.push_back(SI->getFalseValue());
+      worklist.push_back(SI->getTrueValue());
+      PhiOrSelectSeen = true;
       continue;
     }
 
@@ -4314,30 +4246,29 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
         V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
         InsertedInsts, PromotedInsts, TPT);
+    NewAddrMode.OriginalValue = V;
 
-    if (!AddrModeFound) {
-      AddrModeFound = true;
-      AddrMode = NewAddrMode;
-      continue;
-    }
-    if (NewAddrMode == AddrMode)
-      continue;
-
-    AddrModeFound = false;
-    break;
+    if (!AddrModes.addNewAddrMode(NewAddrMode))
+      break;
   }
 
-  // If the addressing mode couldn't be determined, or if multiple different
-  // ones were determined, bail out now.
-  if (!AddrModeFound) {
+  // Try to combine the AddrModes we've collected. If we couldn't collect any,
+  // or we have multiple but either couldn't combine them or combining them
+  // wouldn't do anything useful, bail out now.
+  if (!AddrModes.combineAddrModes()) {
     TPT.rollback(LastKnownGood);
     return false;
   }
   TPT.commit();
 
+  // Get the combined AddrMode (or the only AddrMode, if we only had one).
+  ExtAddrMode AddrMode = AddrModes.getAddrMode();
+
   // If all the instructions matched are already in this BB, don't do anything.
-  // If we saw Phi node then it is not local definitely.
-  if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) {
+  // If we saw a Phi node then it is not local definitely, and if we saw a select
+  // then we want to push the address calculation past it even if it's already
+  // in this BB.
+  if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
         return IsNonLocalValue(V, MemoryInst->getParent());
                   })) {
     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
@@ -4351,9 +4282,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
 
   // Now that we determined the addressing expression we want to use and know
   // that we have to sink it into this block.  Check to see if we have already
-  // done this for some other load/store instr in this block.  If so, reuse the
-  // computation.
-  Value *&SunkAddr = SunkAddrs[Addr];
+  // done this for some other load/store instr in this block.  If so, reuse
+  // the computation.  Before attempting reuse, check if the address is valid
+  // as it may have been erased.
+
+  WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
+
+  Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
   if (SunkAddr) {
     DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
                  << *MemoryInst << "\n");
@@ -4578,6 +4513,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   }
 
   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+  // Store the newly computed address into the cache. In the case we reused a
+  // value, this should be idempotent.
+  SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
 
   // If we have no uses, recursively delete the value and all dead instructions
   // using it.
@@ -4909,8 +4847,7 @@ bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
     assert(LI && ExtFedByLoad && "Expect a valid load and extension");
     TPT.commit();
     // Move the extend into the same block as the load
-    ExtFedByLoad->removeFromParent();
-    ExtFedByLoad->insertAfter(LI);
+    ExtFedByLoad->moveAfter(LI);
     // CGP does not check if the zext would be speculatively executed when moved
     // to the same basic block as the load. Preserving its original location
     // would pessimize the debugging experience, as well as negatively impact
@@ -5127,10 +5064,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
 // b2:
 //   x = phi x1', x2'
 //   y = and x, 0xff
-//
-
 bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
-
   if (!Load->isSimple() ||
       !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
     return false;
@@ -5169,7 +5103,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
     }
 
     switch (I->getOpcode()) {
-    case llvm::Instruction::And: {
+    case Instruction::And: {
       auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
       if (!AndC)
         return false;
@@ -5183,7 +5117,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
       break;
     }
 
-    case llvm::Instruction::Shl: {
+    case Instruction::Shl: {
       auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
       if (!ShlC)
         return false;
@@ -5192,7 +5126,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
       break;
     }
 
-    case llvm::Instruction::Trunc: {
+    case Instruction::Trunc: {
       EVT TruncVT = TLI->getValueType(*DL, I->getType());
       unsigned TruncBitWidth = TruncVT.getSizeInBits();
       DemandBits.setLowBits(TruncBitWidth);
@@ -5596,6 +5530,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
 
 
 namespace {
+
 /// \brief Helper class to promote a scalar operation to a vector one.
 /// This class is used to move downward extractelement transition.
 /// E.g.,
@@ -5623,12 +5558,15 @@ class VectorPromoteHelper {
 
   /// The transition being moved downwards.
   Instruction *Transition;
+
   /// The sequence of instructions to be promoted.
   SmallVector<Instruction *, 4> InstsToBePromoted;
+
   /// Cost of combining a store and an extract.
   unsigned StoreExtractCombineCost;
+
   /// Instruction that will be combined with the transition.
-  Instruction *CombineInst;
+  Instruction *CombineInst = nullptr;
 
   /// \brief The instruction that represents the current end of the transition.
   /// Since we are faking the promotion until we reach the end of the chain
@@ -5734,7 +5672,7 @@ class VectorPromoteHelper {
   /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
   /// used at the index of the extract.
   Value *getConstantVector(Constant *Val, bool UseSplat) const {
-    unsigned ExtractIdx = UINT_MAX;
+    unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
     if (!UseSplat) {
       // If we cannot determine where the constant must be, we have to
       // use a splat constant.
@@ -5788,7 +5726,7 @@ public:
                       const TargetTransformInfo &TTI, Instruction *Transition,
                       unsigned CombineCost)
       : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
-        StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
+        StoreExtractCombineCost(CombineCost) {
     assert(Transition && "Do not know how to promote null");
   }
 
@@ -5863,7 +5801,8 @@ public:
     return true;
   }
 };
-} // End of anonymous namespace.
+
+} // end anonymous namespace
 
 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
   // At this point, we know that all the operands of ToBePromoted but Def
@@ -5902,8 +5841,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
                        "this?");
     ToBePromoted->setOperand(U.getOperandNo(), NewVal);
   }
-  Transition->removeFromParent();
-  Transition->insertAfter(ToBePromoted);
+  Transition->moveAfter(ToBePromoted);
   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
 }
 
@@ -5911,7 +5849,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
 /// Try to push the extractelement towards the stores when the target
 /// has this feature and this is profitable.
 bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
-  unsigned CombineCost = UINT_MAX;
+  unsigned CombineCost = std::numeric_limits<unsigned>::max();
   if (DisableStoreExtract || !TLI ||
       (!StressStoreExtract &&
        !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
@@ -6073,6 +6011,170 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
   return true;
 }
 
+// Return true if the GEP has two operands, the first operand is of a sequential
+// type, and the second operand is a constant.
+static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
+  gep_type_iterator I = gep_type_begin(*GEP);
+  return GEP->getNumOperands() == 2 &&
+      I.isSequential() &&
+      isa<ConstantInt>(GEP->getOperand(1));
+}
+
+// Try unmerging GEPs to reduce liveness interference (register pressure) across
+// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
+// reducing liveness interference across those edges benefits global register
+// allocation. Currently handles only certain cases.
+//
+// For example, unmerge %GEPI and %UGEPI as below.
+//
+// ---------- BEFORE ----------
+// SrcBlock:
+//   ...
+//   %GEPIOp = ...
+//   ...
+//   %GEPI = gep %GEPIOp, Idx
+//   ...
+//   indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
+//   (* %GEPI is alive on the indirectbr edges due to other uses ahead)
+//   (* %GEPIOp is alive on the indirectbr edges only because of it's used by
+//   %UGEPI)
+//
+// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
+// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
+// ...
+//
+// DstBi:
+//   ...
+//   %UGEPI = gep %GEPIOp, UIdx
+// ...
+// ---------------------------
+//
+// ---------- AFTER ----------
+// SrcBlock:
+//   ... (same as above)
+//    (* %GEPI is still alive on the indirectbr edges)
+//    (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
+//    unmerging)
+// ...
+//
+// DstBi:
+//   ...
+//   %UGEPI = gep %GEPI, (UIdx-Idx)
+//   ...
+// ---------------------------
+//
+// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
+// no longer alive on them.
+//
+// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
+// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
+// not to disable further simplications and optimizations as a result of GEP
+// merging.
+//
+// Note this unmerging may increase the length of the data flow critical path
+// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
+// between the register pressure and the length of data-flow critical
+// path. Restricting this to the uncommon IndirectBr case would minimize the
+// impact of potentially longer critical path, if any, and the impact on compile
+// time.
+static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
+                                             const TargetTransformInfo *TTI) {
+  BasicBlock *SrcBlock = GEPI->getParent();
+  // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
+  // (non-IndirectBr) cases exit early here.
+  if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
+    return false;
+  // Check that GEPI is a simple gep with a single constant index.
+  if (!GEPSequentialConstIndexed(GEPI))
+    return false;
+  ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
+  // Check that GEPI is a cheap one.
+  if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
+      > TargetTransformInfo::TCC_Basic)
+    return false;
+  Value *GEPIOp = GEPI->getOperand(0);
+  // Check that GEPIOp is an instruction that's also defined in SrcBlock.
+  if (!isa<Instruction>(GEPIOp))
+    return false;
+  auto *GEPIOpI = cast<Instruction>(GEPIOp);
+  if (GEPIOpI->getParent() != SrcBlock)
+    return false;
+  // Check that GEP is used outside the block, meaning it's alive on the
+  // IndirectBr edge(s).
+  if (find_if(GEPI->users(), [&](User *Usr) {
+        if (auto *I = dyn_cast<Instruction>(Usr)) {
+          if (I->getParent() != SrcBlock) {
+            return true;
+          }
+        }
+        return false;
+      }) == GEPI->users().end())
+    return false;
+  // The second elements of the GEP chains to be unmerged.
+  std::vector<GetElementPtrInst *> UGEPIs;
+  // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
+  // on IndirectBr edges.
+  for (User *Usr : GEPIOp->users()) {
+    if (Usr == GEPI) continue;
+    // Check if Usr is an Instruction. If not, give up.
+    if (!isa<Instruction>(Usr))
+      return false;
+    auto *UI = cast<Instruction>(Usr);
+    // Check if Usr in the same block as GEPIOp, which is fine, skip.
+    if (UI->getParent() == SrcBlock)
+      continue;
+    // Check if Usr is a GEP. If not, give up.
+    if (!isa<GetElementPtrInst>(Usr))
+      return false;
+    auto *UGEPI = cast<GetElementPtrInst>(Usr);
+    // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
+    // the pointer operand to it. If so, record it in the vector. If not, give
+    // up.
+    if (!GEPSequentialConstIndexed(UGEPI))
+      return false;
+    if (UGEPI->getOperand(0) != GEPIOp)
+      return false;
+    if (GEPIIdx->getType() !=
+        cast<ConstantInt>(UGEPI->getOperand(1))->getType())
+      return false;
+    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+    if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
+        > TargetTransformInfo::TCC_Basic)
+      return false;
+    UGEPIs.push_back(UGEPI);
+  }
+  if (UGEPIs.size() == 0)
+    return false;
+  // Check the materializing cost of (Uidx-Idx).
+  for (GetElementPtrInst *UGEPI : UGEPIs) {
+    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+    APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
+    unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
+    if (ImmCost > TargetTransformInfo::TCC_Basic)
+      return false;
+  }
+  // Now unmerge between GEPI and UGEPIs.
+  for (GetElementPtrInst *UGEPI : UGEPIs) {
+    UGEPI->setOperand(0, GEPI);
+    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+    Constant *NewUGEPIIdx =
+        ConstantInt::get(GEPIIdx->getType(),
+                         UGEPIIdx->getValue() - GEPIIdx->getValue());
+    UGEPI->setOperand(1, NewUGEPIIdx);
+    // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
+    // inbounds to avoid UB.
+    if (!GEPI->isInBounds()) {
+      UGEPI->setIsInBounds(false);
+    }
+  }
+  // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
+  // alive on IndirectBr edges).
+  assert(find_if(GEPIOp->users(), [&](User *Usr) {
+        return cast<Instruction>(Usr)->getParent() != SrcBlock;
+      }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock");
+  return true;
+}
+
 bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
@@ -6186,6 +6288,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
       optimizeInst(NC, ModifiedDT);
       return true;
     }
+    if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
+      return true;
+    }
     return false;
   }
 
@@ -6266,7 +6371,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
       Instruction *Insn = &*BI++;
       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
       // Leave dbg.values that refer to an alloca alone. These
-      // instrinsics describe the address of a variable (= the alloca)
+      // intrinsics describe the address of a variable (= the alloca)
       // being taken.  They should not be moved next to the alloca
       // (and to the beginning of the scope), but rather stay close to
       // where said address is used.
@@ -6298,7 +6403,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
 /// \brief Scale down both weights to fit into uint32_t.
 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
   uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
-  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+  uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
   NewTrue = NewTrue / Scale;
   NewFalse = NewFalse / Scale;
 }
diff --git a/lib/CodeGen/CountingFunctionInserter.cpp b/lib/CodeGen/CountingFunctionInserter.cpp
deleted file mode 100644
index 7f7350f5fb5c..000000000000
--- a/lib/CodeGen/CountingFunctionInserter.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-//===- CountingFunctionInserter.cpp - Insert mcount-like function calls ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Insert calls to counter functions, such as mcount, intended to be called
-// once per function, at the beginning of each function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
-  struct CountingFunctionInserter : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    CountingFunctionInserter() : FunctionPass(ID) {
-      initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry());
-    }
-    
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addPreserved<GlobalsAAWrapperPass>();
-    }
-
-    bool runOnFunction(Function &F) override {
-      std::string CountingFunctionName =
-        F.getFnAttribute("counting-function").getValueAsString();
-      if (CountingFunctionName.empty())
-        return false;
-
-      Type *VoidTy = Type::getVoidTy(F.getContext());
-      Constant *CountingFn =
-        F.getParent()->getOrInsertFunction(CountingFunctionName,
-                                           VoidTy);
-      CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
-      return true;
-    }
-  };
-  
-  char CountingFunctionInserter::ID = 0;
-}
-
-INITIALIZE_PASS(CountingFunctionInserter, "cfinserter", 
-                "Inserts calls to mcount-like functions", false, false)
-
-//===----------------------------------------------------------------------===//
-//
-// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names.
-//
-FunctionPass *llvm::createCountingFunctionInserterPass() {
-  return new CountingFunctionInserter();
-}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a3cf2846d2f5..98e22b24d37a 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -1,4 +1,4 @@
-//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,14 +14,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "CriticalAntiDepBreaker.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <map>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -35,8 +50,7 @@ CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
       Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
       DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
 
-CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
-}
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default;
 
 void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   const unsigned BBSize = BB->size();
@@ -156,11 +170,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
   // FIXME: The issue with predicated instruction is more complex. We are being
   // conservative here because the kill markers cannot be trusted after
   // if-conversion:
-  // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+  // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
   // ...
-  // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
-  // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
-  // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+  // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
+  // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
+  // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
   //
   // The first R6 kill is not really a kill since it's killed by a predicated
   // instruction which may not be executed. The second R6 def may or may not
@@ -333,8 +347,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
 bool
 CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
                                                 RegRefIter RegRefEnd,
-                                                unsigned NewReg)
-{
+                                                unsigned NewReg) {
   for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
     MachineOperand *RefOper = I->second;
 
@@ -381,8 +394,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
                          unsigned AntiDepReg,
                          unsigned LastNewReg,
                          const TargetRegisterClass *RC,
-                         SmallVectorImpl<unsigned> &Forbid)
-{
+                         SmallVectorImpl<unsigned> &Forbid) {
   ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
   for (unsigned i = 0; i != Order.size(); ++i) {
     unsigned NewReg = Order[i];
@@ -423,7 +435,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin,
 }
 
 unsigned CriticalAntiDepBreaker::
-BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+BreakAntiDependencies(const std::vector<SUnit> &SUnits,
                       MachineBasicBlock::iterator Begin,
                       MachineBasicBlock::iterator End,
                       unsigned InsertPosIndex,
@@ -436,7 +448,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
   // This is used for updating debug information.
   //
   // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
-  DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+  DenseMap<MachineInstr *, const SUnit *> MISUnitMap;
 
   // Find the node at the bottom of the critical path.
   const SUnit *Max = nullptr;
@@ -454,7 +466,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     DEBUG(dbgs() << "Available regs:");
     for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
       if (KillIndices[Reg] == ~0u)
-        DEBUG(dbgs() << " " << TRI->getName(Reg));
+        DEBUG(dbgs() << " " << printReg(Reg, TRI));
     }
     DEBUG(dbgs() << '\n');
   }
@@ -634,9 +646,9 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
                                                      LastNewReg[AntiDepReg],
                                                      RC, ForbidRegs)) {
         DEBUG(dbgs() << "Breaking anti-dependence edge on "
-              << TRI->getName(AntiDepReg)
-              << " with " << RegRefs.count(AntiDepReg) << " references"
-              << " using " << TRI->getName(NewReg) << "!\n");
+                     << printReg(AntiDepReg, TRI) << " with "
+                     << RegRefs.count(AntiDepReg) << " references"
+                     << " using " << printReg(NewReg, TRI) << "!\n");
 
         // Update the references to the old register to refer to the new
         // register.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 678779fa1a26..09c4423a2f05 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -1,4 +1,4 @@
-//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,16 +18,21 @@
 
 #include "AntiDepBreaker.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include <map>
+#include <vector>
 
 namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
 class RegisterClassInfo;
 class TargetInstrInfo;
+class TargetRegisterClass;
 class TargetRegisterInfo;
-class MachineFunction;
 
 class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
     MachineFunction& MF;
@@ -46,12 +51,13 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
     /// corresponding value is null. If the register is live but used in
     /// multiple register classes, the corresponding value is -1 casted to a
     /// pointer.
-    std::vector<const TargetRegisterClass*> Classes;
+    std::vector<const TargetRegisterClass *> Classes;
 
     /// Map registers to all their references within a live range.
     std::multimap<unsigned, MachineOperand *> RegRefs;
-    typedef std::multimap<unsigned, MachineOperand *>::const_iterator
-      RegRefIter;
+
+    using RegRefIter =
+        std::multimap<unsigned, MachineOperand *>::const_iterator;
 
     /// The index of the most recent kill (proceeding bottom-up),
     /// or ~0u if the register is not live.
@@ -66,7 +72,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
     BitVector KeepRegs;
 
   public:
-    CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
+    CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI);
     ~CriticalAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
@@ -74,7 +80,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
 
     /// Identifiy anti-dependencies along the critical path
     /// of the ScheduleDAG and break them by renaming registers.
-    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+    unsigned BreakAntiDependencies(const std::vector<SUnit> &SUnits,
                                    MachineBasicBlock::iterator Begin,
                                    MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex,
@@ -101,6 +107,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
                                       const TargetRegisterClass *RC,
                                       SmallVectorImpl<unsigned> &Forbid);
   };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 853b9afa1026..848db444270d 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -29,13 +29,13 @@
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
@@ -336,6 +336,38 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
   VLIWScheduler->finishBlock();
 }
 
+bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
+                               const MachineMemOperand &Op2,
+                               bool UseTBAA) const {
+  if (!Op1.getValue() || !Op2.getValue())
+    return true;
+
+  int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
+  int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
+  int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
+
+  AliasResult AAResult =
+      AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
+                               UseTBAA ? Op1.getAAInfo() : AAMDNodes()),
+                MemoryLocation(Op2.getValue(), Overlapb,
+                               UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
+
+  return AAResult != NoAlias;
+}
+
+bool VLIWPacketizerList::alias(const MachineInstr &MI1,
+                               const MachineInstr &MI2,
+                               bool UseTBAA) const {
+  if (MI1.memoperands_empty() || MI2.memoperands_empty())
+    return true;
+
+  for (const MachineMemOperand *Op1 : MI1.memoperands())
+    for (const MachineMemOperand *Op2 : MI2.memoperands())
+      if (alias(*Op1, *Op2, UseTBAA))
+        return true;
+  return false;
+}
+
 // Add a DAG mutation object to the ordered list.
 void VLIWPacketizerList::addMutation(
       std::unique_ptr<ScheduleDAGMutation> Mutation) {
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 91d18e2bcaa6..e6a54bb300f2 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -15,11 +15,10 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -95,7 +94,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
 }
 
 bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   bool AnyChanges = false;
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index ab9a0592e017..7d7eb57352a2 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -17,12 +17,12 @@
 /// when subregisters are involved.
 ///
 /// Example:
-///    %vreg0 = some definition
-///    %vreg1 = IMPLICIT_DEF
-///    %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1
-///    %vreg3 = EXTRACT_SUBREG %vreg2, sub1
-///           = use %vreg3
-/// The %vreg0 definition is dead and %vreg3 contains an undefined value.
+///    %0 = some definition
+///    %1 = IMPLICIT_DEF
+///    %2 = REG_SEQUENCE %0, sub0, %1, sub1
+///    %3 = EXTRACT_SUBREG %2, sub1
+///       = use %3
+/// The %0 definition is dead and %3 contains an undefined value.
 //
 //===----------------------------------------------------------------------===//
 
@@ -34,14 +34,13 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/PassRegistry.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -526,7 +525,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
     for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
       unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
       const VRegInfo &Info = VRegInfos[RegIdx];
-      dbgs() << PrintReg(Reg, nullptr)
+      dbgs() << printReg(Reg, nullptr)
              << " Used: " << PrintLaneMask(Info.UsedLanes)
              << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
     }
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 2f833260bca2..39d80c0bf9bd 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//===- DwarfEHPrepare - Prepare exception handling for code generation ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,20 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include <cstddef>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "dwarfehprepare"
@@ -34,12 +43,13 @@ using namespace llvm;
 STATISTIC(NumResumesLowered, "Number of resume calls lowered");
 
 namespace {
+
   class DwarfEHPrepare : public FunctionPass {
     // RewindFunction - _Unwind_Resume or the target equivalent.
-    Constant *RewindFunction;
+    Constant *RewindFunction = nullptr;
 
-    DominatorTree *DT;
-    const TargetLowering *TLI;
+    DominatorTree *DT = nullptr;
+    const TargetLowering *TLI = nullptr;
 
     bool InsertUnwindResumeCalls(Function &Fn);
     Value *GetExceptionObject(ResumeInst *RI);
@@ -51,9 +61,7 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid.
 
-    DwarfEHPrepare()
-        : FunctionPass(ID), RewindFunction(nullptr), DT(nullptr), TLI(nullptr) {
-    }
+    DwarfEHPrepare() : FunctionPass(ID) {}
 
     bool runOnFunction(Function &Fn) override;
 
@@ -68,9 +76,11 @@ namespace {
       return "Exception handling preparation";
     }
   };
+
 } // end anonymous namespace
 
 char DwarfEHPrepare::ID = 0;
+
 INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE,
                       "Prepare DWARF exceptions", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -162,7 +172,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
       BasicBlock *BB = RI->getParent();
       new UnreachableInst(Ctx, RI);
       RI->eraseFromParent();
-      SimplifyCFG(BB, TTI, 1);
+      simplifyCFG(BB, TTI);
     }
   }
   Resumes.resize(ResumesLeft);
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 402afe75b141..6294ff450113 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -30,12 +30,12 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -185,7 +185,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
   // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
   // get right.
   if (!MBB->livein_empty()) {
-    DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+    DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
     return false;
   }
 
@@ -199,7 +199,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
       continue;
 
     if (++InstrCount > BlockInstrLimit && !Stress) {
-      DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+      DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
                    << BlockInstrLimit << " instructions.\n");
       return false;
     }
@@ -246,7 +246,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
       if (!DefMI || DefMI->getParent() != Head)
         continue;
       if (InsertAfter.insert(DefMI).second)
-        DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+        DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " << *DefMI);
       if (DefMI->isTerminator()) {
         DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
         return false;
@@ -317,7 +317,7 @@ bool SSAIfConv::findInsertionPoint() {
         dbgs() << "Would clobber";
         for (SparseSet<unsigned>::const_iterator
              i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
-          dbgs() << ' ' << PrintRegUnit(*i, TRI);
+          dbgs() << ' ' << printRegUnit(*i, TRI);
         dbgs() << " live before " << *I;
       });
       continue;
@@ -361,10 +361,10 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
     if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
         Succ1->succ_begin()[0] != Tail)
       return false;
-    DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
-                 << " -> BB#" << Succ0->getNumber()
-                 << "/BB#" << Succ1->getNumber()
-                 << " -> BB#" << Tail->getNumber() << '\n');
+    DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "
+                 << printMBBReference(*Succ0) << "/"
+                 << printMBBReference(*Succ1) << " -> "
+                 << printMBBReference(*Tail) << '\n');
 
     // Live-in physregs are tricky to get right when speculating code.
     if (!Tail->livein_empty()) {
@@ -372,9 +372,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
       return false;
     }
   } else {
-    DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
-                 << " -> BB#" << Succ0->getNumber()
-                 << " -> BB#" << Tail->getNumber() << '\n');
+    DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
+                 << printMBBReference(*Succ0) << " -> "
+                 << printMBBReference(*Tail) << '\n');
   }
 
   // This is a triangle or a diamond.
@@ -563,8 +563,8 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
   assert(Head->succ_empty() && "Additional head successors?");
   if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
     // Splice Tail onto the end of Head.
-    DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
-                 << " into head BB#" << Head->getNumber() << '\n');
+    DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) << " into head "
+                 << printMBBReference(*Head) << '\n');
     Head->splice(Head->end(), Tail,
                      Tail->begin(), Tail->end());
     Head->transferSuccessorsAndUpdatePHIs(Tail);
@@ -785,7 +785,7 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
 bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
                << "********** Function: " << MF.getName() << '\n');
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   // Only run if conversion if the target wants it.
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index b3a25544be39..54c53eb16312 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -80,13 +80,15 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
   O << "digraph {\n";
   for (const auto &MBB : *MF) {
     unsigned BB = MBB.getNumber();
-    O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
-      << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
-      << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+    O << "\t\"" << printMBBReference(MBB) << "\" [ shape=box ]\n"
+      << '\t' << G.getBundle(BB, false) << " -> \"" << printMBBReference(MBB)
+      << "\"\n"
+      << "\t\"" << printMBBReference(MBB) << "\" -> " << G.getBundle(BB, true)
+      << '\n';
     for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
            SE = MBB.succ_end(); SI != SE; ++SI)
-      O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
-        << "\" [ color=lightgray ]\n";
+      O << "\t\"" << printMBBReference(MBB) << "\" -> \""
+        << printMBBReference(**SI) << "\" [ color=lightgray ]\n";
   }
   O << "}\n";
   return O;
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index e272d25047e6..61ec3f4be1dc 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -15,11 +15,11 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -200,7 +200,7 @@ void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
         LiveRegs[rx].Def = -1;
       }
     }
-    DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+    DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
     return;
   }
 
@@ -246,7 +246,7 @@ void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
     }
   }
   DEBUG(
-      dbgs() << "BB#" << MBB->getNumber()
+      dbgs() << printMBBReference(*MBB)
              << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
 }
 
@@ -394,7 +394,7 @@ void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
       continue;
     for (int rx : regIndices(MO.getReg())) {
       // This instruction explicitly defines rx.
-      DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+      DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << CurInstr
                    << '\t' << *MI);
 
       if (breakDependency) {
@@ -617,7 +617,7 @@ bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
 }
 
 bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
-  if (skipFunction(*mf.getFunction()))
+  if (skipFunction(mf.getFunction()))
     return false;
   MF = &mf;
   TII = MF->getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 324ea171293d..ec586a2caea3 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -17,9 +17,9 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "expand-isel-pseudos"
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
new file mode 100644
index 000000000000..09c808463a41
--- /dev/null
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -0,0 +1,825 @@
+//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to expand memcmp() calls into optimally-sized loads and
+// compares for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "expandmemcmp"
+
+STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
+STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
+STATISTIC(NumMemCmpGreaterThanMax,
+          "Number of memcmp calls with size greater than max size");
+STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
+
+static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
+    "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
+    cl::desc("The number of loads per basic block for inline expansion of "
+             "memcmp that is only being compared against zero."));
+
+namespace {
+
+
+// This class provides helper functions to expand a memcmp library call into an
+// inline expansion.
+class MemCmpExpansion {
+  struct ResultBlock {
+    BasicBlock *BB = nullptr;
+    PHINode *PhiSrc1 = nullptr;
+    PHINode *PhiSrc2 = nullptr;
+
+    ResultBlock() = default;
+  };
+
+  CallInst *const CI;
+  ResultBlock ResBlock;
+  const uint64_t Size;
+  unsigned MaxLoadSize;
+  uint64_t NumLoadsNonOneByte;
+  const uint64_t NumLoadsPerBlock;
+  std::vector<BasicBlock *> LoadCmpBlocks;
+  BasicBlock *EndBlock;
+  PHINode *PhiRes;
+  const bool IsUsedForZeroCmp;
+  const DataLayout &DL;
+  IRBuilder<> Builder;
+  // Represents the decomposition in blocks of the expansion. For example,
+  // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
+  // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
+  // TODO(courbet): Involve the target more in this computation. On X86, 7
+  // bytes can be done more efficiently with two overlaping 4-byte loads than
+  // covering the interval with [{4, 0},{2, 4},{1, 6}}.
+  struct LoadEntry {
+    LoadEntry(unsigned LoadSize, uint64_t Offset)
+        : LoadSize(LoadSize), Offset(Offset) {
+      assert(Offset % LoadSize == 0 && "invalid load entry");
+    }
+
+    uint64_t getGEPIndex() const { return Offset / LoadSize; }
+
+    // The size of the load for this block, in bytes.
+    const unsigned LoadSize;
+    // The offset of this load WRT the base pointer, in bytes.
+    const uint64_t Offset;
+  };
+  SmallVector<LoadEntry, 8> LoadSequence;
+
+  void createLoadCmpBlocks();
+  void createResultBlock();
+  void setupResultBlockPHINodes();
+  void setupEndBlockPHINodes();
+  Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
+  void emitLoadCompareBlock(unsigned BlockIndex);
+  void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
+                                         unsigned &LoadIndex);
+  void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex);
+  void emitMemCmpResultBlock();
+  Value *getMemCmpExpansionZeroCase();
+  Value *getMemCmpEqZeroOneBlock();
+  Value *getMemCmpOneBlock();
+
+ public:
+  MemCmpExpansion(CallInst *CI, uint64_t Size,
+                  const TargetTransformInfo::MemCmpExpansionOptions &Options,
+                  unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
+                  unsigned NumLoadsPerBlock, const DataLayout &DL);
+
+  unsigned getNumBlocks();
+  uint64_t getNumLoads() const { return LoadSequence.size(); }
+
+  Value *getMemCmpExpansion();
+};
+
+// Initialize the basic block structure required for expansion of memcmp call
+// with given maximum load size and memcmp size parameter.
+// This structure includes:
+// 1. A list of load compare blocks - LoadCmpBlocks.
+// 2. An EndBlock, split from original instruction point, which is the block to
+// return from.
+// 3. ResultBlock, block to branch to for early exit when a
+// LoadCmpBlock finds a difference.
+MemCmpExpansion::MemCmpExpansion(
+    CallInst *const CI, uint64_t Size,
+    const TargetTransformInfo::MemCmpExpansionOptions &Options,
+    const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
+    const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout)
+    : CI(CI),
+      Size(Size),
+      MaxLoadSize(0),
+      NumLoadsNonOneByte(0),
+      NumLoadsPerBlock(NumLoadsPerBlock),
+      IsUsedForZeroCmp(IsUsedForZeroCmp),
+      DL(TheDataLayout),
+      Builder(CI) {
+  assert(Size > 0 && "zero blocks");
+  // Scale the max size down if the target can load more bytes than we need.
+  size_t LoadSizeIndex = 0;
+  while (LoadSizeIndex < Options.LoadSizes.size() &&
+         Options.LoadSizes[LoadSizeIndex] > Size) {
+    ++LoadSizeIndex;
+  }
+  this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
+  // Compute the decomposition.
+  uint64_t CurSize = Size;
+  uint64_t Offset = 0;
+  while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
+    const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
+    assert(LoadSize > 0 && "zero load size");
+    const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
+    if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+      // Do not expand if the total number of loads is larger than what the
+      // target allows. Note that it's important that we exit before completing
+      // the expansion to avoid using a ton of memory to store the expansion for
+      // large sizes.
+      LoadSequence.clear();
+      return;
+    }
+    if (NumLoadsForThisSize > 0) {
+      for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+        LoadSequence.push_back({LoadSize, Offset});
+        Offset += LoadSize;
+      }
+      if (LoadSize > 1) {
+        ++NumLoadsNonOneByte;
+      }
+      CurSize = CurSize % LoadSize;
+    }
+    ++LoadSizeIndex;
+  }
+  assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+}
+
+unsigned MemCmpExpansion::getNumBlocks() {
+  if (IsUsedForZeroCmp)
+    return getNumLoads() / NumLoadsPerBlock +
+           (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0);
+  return getNumLoads();
+}
+
+void MemCmpExpansion::createLoadCmpBlocks() {
+  for (unsigned i = 0; i < getNumBlocks(); i++) {
+    BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
+                                        EndBlock->getParent(), EndBlock);
+    LoadCmpBlocks.push_back(BB);
+  }
+}
+
+void MemCmpExpansion::createResultBlock() {
+  ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
+                                   EndBlock->getParent(), EndBlock);
+}
+
+// This function creates the IR instructions for loading and comparing 1 byte.
+// It loads 1 byte from each source of the memcmp parameters with the given
+// GEPIndex. It then subtracts the two loaded values and adds this result to the
+// final phi node for selecting the memcmp result.
+void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
+                                               unsigned GEPIndex) {
+  Value *Source1 = CI->getArgOperand(0);
+  Value *Source2 = CI->getArgOperand(1);
+
+  Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+  Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
+  // Cast source to LoadSizeType*.
+  if (Source1->getType() != LoadSizeType)
+    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+  if (Source2->getType() != LoadSizeType)
+    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+  // Get the base address using the GEPIndex.
+  if (GEPIndex != 0) {
+    Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+                                ConstantInt::get(LoadSizeType, GEPIndex));
+    Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+                                ConstantInt::get(LoadSizeType, GEPIndex));
+  }
+
+  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+  LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
+  LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
+  Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
+
+  PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);
+
+  if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
+    // Early exit branch if difference found to EndBlock. Otherwise, continue to
+    // next LoadCmpBlock,
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
+                                    ConstantInt::get(Diff->getType(), 0));
+    BranchInst *CmpBr =
+        BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+    Builder.Insert(CmpBr);
+  } else {
+    // The last block has an unconditional branch to EndBlock.
+    BranchInst *CmpBr = BranchInst::Create(EndBlock);
+    Builder.Insert(CmpBr);
+  }
+}
+
+/// Generate an equality comparison for one or more pairs of loaded values.
+/// This is used in the case where the memcmp() call is compared equal or not
+/// equal to zero.
+Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
+                                            unsigned &LoadIndex) {
+  assert(LoadIndex < getNumLoads() &&
+         "getCompareLoadPairs() called with no remaining loads");
+  std::vector<Value *> XorList, OrList;
+  Value *Diff;
+
+  const unsigned NumLoads =
+      std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock);
+
+  // For a single-block expansion, start inserting before the memcmp call.
+  if (LoadCmpBlocks.empty())
+    Builder.SetInsertPoint(CI);
+  else
+    Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+
+  Value *Cmp = nullptr;
+  // If we have multiple loads per block, we need to generate a composite
+  // comparison using xor+or. The type for the combinations is the largest load
+  // type.
+  IntegerType *const MaxLoadType =
+      NumLoads == 1 ? nullptr
+                    : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+  for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
+    const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
+
+    IntegerType *LoadSizeType =
+        IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
+
+    Value *Source1 = CI->getArgOperand(0);
+    Value *Source2 = CI->getArgOperand(1);
+
+    // Cast source to LoadSizeType*.
+    if (Source1->getType() != LoadSizeType)
+      Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+    if (Source2->getType() != LoadSizeType)
+      Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+    // Get the base address using a GEP.
+    if (CurLoadEntry.Offset != 0) {
+      Source1 = Builder.CreateGEP(
+          LoadSizeType, Source1,
+          ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+      Source2 = Builder.CreateGEP(
+          LoadSizeType, Source2,
+          ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+    }
+
+    // Get a constant or load a value for each source address.
+    Value *LoadSrc1 = nullptr;
+    if (auto *Source1C = dyn_cast<Constant>(Source1))
+      LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
+    if (!LoadSrc1)
+      LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+
+    Value *LoadSrc2 = nullptr;
+    if (auto *Source2C = dyn_cast<Constant>(Source2))
+      LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
+    if (!LoadSrc2)
+      LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+    if (NumLoads != 1) {
+      if (LoadSizeType != MaxLoadType) {
+        LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+        LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
+      }
+      // If we have multiple loads per block, we need to generate a composite
+      // comparison using xor+or.
+      Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+      Diff = Builder.CreateZExt(Diff, MaxLoadType);
+      XorList.push_back(Diff);
+    } else {
+      // If there's only one load per block, we just compare the loaded values.
+      Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+    }
+  }
+
+  auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
+    std::vector<Value *> OutList;
+    for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
+      Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
+      OutList.push_back(Or);
+    }
+    if (InList.size() % 2 != 0)
+      OutList.push_back(InList.back());
+    return OutList;
+  };
+
+  if (!Cmp) {
+    // Pairwise OR the XOR results.
+    OrList = pairWiseOr(XorList);
+
+    // Pairwise OR the OR results until one result left.
+    while (OrList.size() != 1) {
+      OrList = pairWiseOr(OrList);
+    }
+    Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
+  }
+
+  return Cmp;
+}
+
+void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
+                                                        unsigned &LoadIndex) {
+  Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
+
+  BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
+                           ? EndBlock
+                           : LoadCmpBlocks[BlockIndex + 1];
+  // Early exit branch if difference found to ResultBlock. Otherwise,
+  // continue to next LoadCmpBlock or EndBlock.
+  BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+  Builder.Insert(CmpBr);
+
+  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+  // since early exit to ResultBlock was not taken (no difference was found in
+  // any of the bytes).
+  if (BlockIndex == LoadCmpBlocks.size() - 1) {
+    Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+    PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
+  }
+}
+
+// This function creates the IR intructions for loading and comparing using the
+// given LoadSize. It loads the number of bytes specified by LoadSize from each
+// source of the memcmp parameters. It then does a subtract to see if there was
+// a difference in the loaded values. If a difference is found, it branches
+// with an early exit to the ResultBlock for calculating which source was
+// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
+// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
+// a special case through emitLoadCompareByteBlock. The special handling can
+// simply subtract the loaded values and add it to the result phi node.
+void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
+  // There is one load per block in this case, BlockIndex == LoadIndex.
+  const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
+
+  if (CurLoadEntry.LoadSize == 1) {
+    MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex,
+                                              CurLoadEntry.getGEPIndex());
+    return;
+  }
+
+  Type *LoadSizeType =
+      IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
+  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+  assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
+
+  Value *Source1 = CI->getArgOperand(0);
+  Value *Source2 = CI->getArgOperand(1);
+
+  Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
+  // Cast source to LoadSizeType*.
+  if (Source1->getType() != LoadSizeType)
+    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+  if (Source2->getType() != LoadSizeType)
+    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+  // Get the base address using a GEP.
+  if (CurLoadEntry.Offset != 0) {
+    Source1 = Builder.CreateGEP(
+        LoadSizeType, Source1,
+        ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+    Source2 = Builder.CreateGEP(
+        LoadSizeType, Source2,
+        ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
+  }
+
+  // Load LoadSizeType from the base address.
+  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+  if (DL.isLittleEndian()) {
+    Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+                                                Intrinsic::bswap, LoadSizeType);
+    LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+    LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+  }
+
+  if (LoadSizeType != MaxLoadType) {
+    LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+    LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
+  }
+
+  // Add the loaded values to the phi nodes for calculating memcmp result only
+  // if result is not used in a zero equality.
+  if (!IsUsedForZeroCmp) {
+    ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]);
+    ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]);
+  }
+
+  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
+  BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
+                           ? EndBlock
+                           : LoadCmpBlocks[BlockIndex + 1];
+  // Early exit branch if difference found to ResultBlock. Otherwise, continue
+  // to next LoadCmpBlock or EndBlock.
+  BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
+  Builder.Insert(CmpBr);
+
+  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+  // since early exit to ResultBlock was not taken (no difference was found in
+  // any of the bytes).
+  if (BlockIndex == LoadCmpBlocks.size() - 1) {
+    Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+    PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
+  }
+}
+
+// This function populates the ResultBlock with a sequence to calculate the
+// memcmp result. It compares the two loaded source values and returns -1 if
+// src1 < src2 and 1 if src1 > src2.
+void MemCmpExpansion::emitMemCmpResultBlock() {
+  // Special case: if memcmp result is used in a zero equality, result does not
+  // need to be calculated and can simply return 1.
+  if (IsUsedForZeroCmp) {
+    BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+    Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+    Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
+    PhiRes->addIncoming(Res, ResBlock.BB);
+    BranchInst *NewBr = BranchInst::Create(EndBlock);
+    Builder.Insert(NewBr);
+    return;
+  }
+  BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+  Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+
+  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
+                                  ResBlock.PhiSrc2);
+
+  Value *Res =
+      Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
+                           ConstantInt::get(Builder.getInt32Ty(), 1));
+
+  BranchInst *NewBr = BranchInst::Create(EndBlock);
+  Builder.Insert(NewBr);
+  PhiRes->addIncoming(Res, ResBlock.BB);
+}
+
+void MemCmpExpansion::setupResultBlockPHINodes() {
+  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+  Builder.SetInsertPoint(ResBlock.BB);
+  // Note: this assumes one load per block.
+  ResBlock.PhiSrc1 =
+      Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
+  ResBlock.PhiSrc2 =
+      Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
+}
+
+void MemCmpExpansion::setupEndBlockPHINodes() {
+  Builder.SetInsertPoint(&EndBlock->front());
+  PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
+}
+
+Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
+  unsigned LoadIndex = 0;
+  // This loop populates each of the LoadCmpBlocks with the IR sequence to
+  // handle multiple loads per block.
+  for (unsigned I = 0; I < getNumBlocks(); ++I) {
+    emitLoadCompareBlockMultipleLoads(I, LoadIndex);
+  }
+
+  emitMemCmpResultBlock();
+  return PhiRes;
+}
+
+/// A memcmp expansion that compares equality with 0 and only has one block of
+/// load and compare can bypass the compare, branch, and phi IR that is required
+/// in the general case.
+Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
+  unsigned LoadIndex = 0;
+  Value *Cmp = getCompareLoadPairs(0, LoadIndex);
+  assert(LoadIndex == getNumLoads() && "some entries were not consumed");
+  return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
+}
+
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock() {
+  assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
+
+  Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+  Value *Source1 = CI->getArgOperand(0);
+  Value *Source2 = CI->getArgOperand(1);
+
+  // Cast source to LoadSizeType*.
+  if (Source1->getType() != LoadSizeType)
+    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+  if (Source2->getType() != LoadSizeType)
+    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+  // Load LoadSizeType from the base address.
+  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+  if (DL.isLittleEndian() && Size != 1) {
+    Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+                                                Intrinsic::bswap, LoadSizeType);
+    LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+    LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+  }
+
+  if (Size < 4) {
+    // The i8 and i16 cases don't need compares. We zext the loaded values and
+    // subtract them to get the suitable negative, zero, or positive i32 result.
+    LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());
+    LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());
+    return Builder.CreateSub(LoadSrc1, LoadSrc2);
+  }
+
+  // The result of memcmp is negative, zero, or positive, so produce that by
+  // subtracting 2 extended compare bits: sub (ugt, ult).
+  // If a target prefers to use selects to get -1/0/1, they should be able
+  // to transform this later. The inverse transform (going from selects to math)
+  // may not be possible in the DAG because the selects got converted into
+  // branches before we got there.
+  Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);
+  Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+  Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
+  Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
+  return Builder.CreateSub(ZextUGT, ZextULT);
+}
+
+// This function expands the memcmp call into an inline expansion and returns
+// the memcmp result.
+Value *MemCmpExpansion::getMemCmpExpansion() {
+  // A memcmp with zero-comparison with only one block of load and compare does
+  // not need to set up any extra blocks. This case could be handled in the DAG,
+  // but since we have all of the machinery to flexibly expand any memcpy here,
+  // we choose to handle this case too to avoid fragmented lowering.
+  if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) {
+    BasicBlock *StartBlock = CI->getParent();
+    EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+    setupEndBlockPHINodes();
+    createResultBlock();
+
+    // If return value of memcmp is not used in a zero equality, we need to
+    // calculate which source was larger. The calculation requires the
+    // two loaded source values of each load compare block.
+    // These will be saved in the phi nodes created by setupResultBlockPHINodes.
+    if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
+
+    // Create the number of required load compare basic blocks.
+    createLoadCmpBlocks();
+
+    // Update the terminator added by splitBasicBlock to branch to the first
+    // LoadCmpBlock.
+    StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+  }
+
+  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+  if (IsUsedForZeroCmp)
+    return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
+                               : getMemCmpExpansionZeroCase();
+
+  // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
+  if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock();
+
+  for (unsigned I = 0; I < getNumBlocks(); ++I) {
+    emitLoadCompareBlock(I);
+  }
+
+  emitMemCmpResultBlock();
+  return PhiRes;
+}
+
+// This function checks to see if an expansion of memcmp can be generated.
+// It checks for constant compare size that is less than the max inline size.
+// If an expansion cannot occur, returns false to leave as a library call.
+// Otherwise, the library call is replaced with a new IR instruction sequence.
+/// We want to transform:
+/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
+/// To:
+/// loadbb:
+///  %0 = bitcast i32* %buffer2 to i8*
+///  %1 = bitcast i32* %buffer1 to i8*
+///  %2 = bitcast i8* %1 to i64*
+///  %3 = bitcast i8* %0 to i64*
+///  %4 = load i64, i64* %2
+///  %5 = load i64, i64* %3
+///  %6 = call i64 @llvm.bswap.i64(i64 %4)
+///  %7 = call i64 @llvm.bswap.i64(i64 %5)
+///  %8 = sub i64 %6, %7
+///  %9 = icmp ne i64 %8, 0
+///  br i1 %9, label %res_block, label %loadbb1
+/// res_block:                                        ; preds = %loadbb2,
+/// %loadbb1, %loadbb
+///  %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
+///  %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
+///  %10 = icmp ult i64 %phi.src1, %phi.src2
+///  %11 = select i1 %10, i32 -1, i32 1
+///  br label %endblock
+/// loadbb1:                                          ; preds = %loadbb
+///  %12 = bitcast i32* %buffer2 to i8*
+///  %13 = bitcast i32* %buffer1 to i8*
+///  %14 = bitcast i8* %13 to i32*
+///  %15 = bitcast i8* %12 to i32*
+///  %16 = getelementptr i32, i32* %14, i32 2
+///  %17 = getelementptr i32, i32* %15, i32 2
+///  %18 = load i32, i32* %16
+///  %19 = load i32, i32* %17
+///  %20 = call i32 @llvm.bswap.i32(i32 %18)
+///  %21 = call i32 @llvm.bswap.i32(i32 %19)
+///  %22 = zext i32 %20 to i64
+///  %23 = zext i32 %21 to i64
+///  %24 = sub i64 %22, %23
+///  %25 = icmp ne i64 %24, 0
+///  br i1 %25, label %res_block, label %loadbb2
+/// loadbb2:                                          ; preds = %loadbb1
+///  %26 = bitcast i32* %buffer2 to i8*
+///  %27 = bitcast i32* %buffer1 to i8*
+///  %28 = bitcast i8* %27 to i16*
+///  %29 = bitcast i8* %26 to i16*
+///  %30 = getelementptr i16, i16* %28, i16 6
+///  %31 = getelementptr i16, i16* %29, i16 6
+///  %32 = load i16, i16* %30
+///  %33 = load i16, i16* %31
+///  %34 = call i16 @llvm.bswap.i16(i16 %32)
+///  %35 = call i16 @llvm.bswap.i16(i16 %33)
+///  %36 = zext i16 %34 to i64
+///  %37 = zext i16 %35 to i64
+///  %38 = sub i64 %36, %37
+///  %39 = icmp ne i64 %38, 0
+///  br i1 %39, label %res_block, label %loadbb3
+/// loadbb3:                                          ; preds = %loadbb2
+///  %40 = bitcast i32* %buffer2 to i8*
+///  %41 = bitcast i32* %buffer1 to i8*
+///  %42 = getelementptr i8, i8* %41, i8 14
+///  %43 = getelementptr i8, i8* %40, i8 14
+///  %44 = load i8, i8* %42
+///  %45 = load i8, i8* %43
+///  %46 = zext i8 %44 to i32
+///  %47 = zext i8 %45 to i32
+///  %48 = sub i32 %46, %47
+///  br label %endblock
+/// endblock:                                         ; preds = %res_block,
+/// %loadbb3
+///  %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
+///  ret i32 %phi.res
+static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
+                         const TargetLowering *TLI, const DataLayout *DL) {
+  NumMemCmpCalls++;
+
+  // Early exit from expansion if -Oz.
+  if (CI->getFunction()->optForMinSize())
+    return false;
+
+  // Early exit from expansion if size is not a constant.
+  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+  if (!SizeCast) {
+    NumMemCmpNotConstant++;
+    return false;
+  }
+  const uint64_t SizeVal = SizeCast->getZExtValue();
+
+  if (SizeVal == 0) {
+    return false;
+  }
+
+  // TTI call to check if target would like to expand memcmp. Also, get the
+  // available load sizes.
+  const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+  const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
+  if (!Options) return false;
+
+  const unsigned MaxNumLoads =
+      TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+
+  MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
+                            IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL);
+
+  // Don't expand if this will require more loads than desired by the target.
+  if (Expansion.getNumLoads() == 0) {
+    NumMemCmpGreaterThanMax++;
+    return false;
+  }
+
+  NumMemCmpInlined++;
+
+  Value *Res = Expansion.getMemCmpExpansion();
+
+  // Replace call with result of expansion and erase call.
+  CI->replaceAllUsesWith(Res);
+  CI->eraseFromParent();
+
+  return true;
+}
+
+
+
+class ExpandMemCmpPass : public FunctionPass {
+public:
+  static char ID;
+
+  ExpandMemCmpPass() : FunctionPass(ID) {
+    initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F)) return false;
+
+    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+    if (!TPC) {
+      return false;
+    }
+    const TargetLowering* TL =
+        TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
+
+    const TargetLibraryInfo *TLI =
+        &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+    const TargetTransformInfo *TTI =
+        &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    auto PA = runImpl(F, TLI, TTI, TL);
+    return !PA.areAllPreserved();
+  }
+
+private:
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    FunctionPass::getAnalysisUsage(AU);
+  }
+
+  PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
+                            const TargetTransformInfo *TTI,
+                            const TargetLowering* TL);
+  // Returns true if a change was made.
+  bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
+                  const TargetTransformInfo *TTI, const TargetLowering* TL,
+                  const DataLayout& DL);
+};
+
+bool ExpandMemCmpPass::runOnBlock(
+    BasicBlock &BB, const TargetLibraryInfo *TLI,
+    const TargetTransformInfo *TTI, const TargetLowering* TL,
+    const DataLayout& DL) {
+  for (Instruction& I : BB) {
+    CallInst *CI = dyn_cast<CallInst>(&I);
+    if (!CI) {
+      continue;
+    }
+    LibFunc Func;
+    if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
+        Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+PreservedAnalyses ExpandMemCmpPass::runImpl(
+    Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
+    const TargetLowering* TL) {
+  const DataLayout& DL = F.getParent()->getDataLayout();
+  bool MadeChanges = false;
+  for (auto BBIt = F.begin(); BBIt != F.end();) {
+    if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) {
+      MadeChanges = true;
+      // If changes were made, restart the function from the beginning, since
+      // the structure of the function was changed.
+      BBIt = F.begin();
+    } else {
+      ++BBIt;
+    }
+  }
+  return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+} // namespace
+
+char ExpandMemCmpPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
+                      "Expand memcmp() to load/stores", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
+                    "Expand memcmp() to load/stores", false, false)
+
+FunctionPass *llvm::createExpandMemCmpPass() {
+  return new ExpandMemCmpPass();
+}
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 4ce86f27a7dd..6ef97d6dd5ec 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -17,11 +17,11 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -104,8 +104,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
   if (DstSubReg == InsReg) {
     // No need to insert an identity copy instruction.
     // Watch out for case like this:
-    // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
-    // We must leave %RAX live.
+    // %rax = SUBREG_TO_REG 0, killed %eax, 3
+    // We must leave %rax live.
     if (DstReg != InsReg) {
       MI->setDesc(TII->get(TargetOpcode::KILL));
       MI->RemoveOperand(3);     // SubIdx
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp
index 70dca3b74b2f..abf487a4f198 100644
--- a/lib/CodeGen/ExpandReductions.cpp
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -95,7 +95,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
       // and it can't be handled by generating this shuffle sequence.
       // TODO: Implement scalarization of ordered reductions here for targets
       // without native support.
-      if (!II->getFastMathFlags().unsafeAlgebra())
+      if (!II->getFastMathFlags().isFast())
         continue;
       Vec = II->getArgOperand(1);
       break;
diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp
index 0759bf6713e0..4ddf9f92836c 100644
--- a/lib/CodeGen/FEntryInserter.cpp
+++ b/lib/CodeGen/FEntryInserter.cpp
@@ -15,11 +15,11 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -36,15 +36,13 @@ struct FEntryInserter : public MachineFunctionPass {
 
 bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) {
   const std::string FEntryName =
-      MF.getFunction()->getFnAttribute("fentry-call").getValueAsString();
+      MF.getFunction().getFnAttribute("fentry-call").getValueAsString();
   if (FEntryName != "true")
     return false;
 
   auto &FirstMBB = *MF.begin();
-  auto &FirstMI = *FirstMBB.begin();
-
   auto *TII = MF.getSubtarget().getInstrInfo();
-  BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+  BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(),
           TII->get(TargetOpcode::FENTRY_CALL));
   return true;
 }
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index 35246545ca91..4361d8b248c8 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -18,17 +18,16 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -329,10 +328,10 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
 
 bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
   // Quick exit for functions that do not use GC.
-  if (!MF.getFunction()->hasGC())
+  if (!MF.getFunction().hasGC())
     return false;
 
-  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction());
   MMI = &getAnalysis<MachineModuleInfo>();
   TII = MF.getSubtarget().getInstrInfo();
 
diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt
index eba7ea8132e3..2db90f8888cb 100644
--- a/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -1,34 +1,18 @@
-# List of all GlobalISel files.
-set(GLOBAL_ISEL_FILES
-      CallLowering.cpp
-      IRTranslator.cpp
-      InstructionSelect.cpp
-      InstructionSelector.cpp
-      MachineIRBuilder.cpp
-      LegalizerHelper.cpp
-      Legalizer.cpp
-      LegalizerInfo.cpp
-      Localizer.cpp
-      RegBankSelect.cpp
-      RegisterBank.cpp
-      RegisterBankInfo.cpp
-      Utils.cpp
-      )
-
-# Add GlobalISel files to the dependencies if the user wants to build it.
-if(LLVM_BUILD_GLOBAL_ISEL)
-  set(GLOBAL_ISEL_BUILD_FILES ${GLOBAL_ISEL_FILES})
-else()
-  set(GLOBAL_ISEL_BUILD_FILES"")
-  set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES})
-endif()
-
-# In LLVMBuild.txt files, it is not possible to mark a dependency to a
-# library as optional. So instead, generate an empty library if we did
-# not ask for it.
 add_llvm_library(LLVMGlobalISel
-        ${GLOBAL_ISEL_BUILD_FILES}
+        CallLowering.cpp
         GlobalISel.cpp
+        IRTranslator.cpp
+        InstructionSelect.cpp
+        InstructionSelector.cpp
+        LegalizerHelper.cpp
+        Legalizer.cpp
+        LegalizerInfo.cpp
+        Localizer.cpp
+        MachineIRBuilder.cpp
+        RegBankSelect.cpp
+        RegisterBank.cpp
+        RegisterBankInfo.cpp
+        Utils.cpp
 
         DEPENDS
         intrinsics_gen
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index be0c5c2bb70e..114c068749eb 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -16,10 +16,10 @@
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Target/TargetLowering.h"
 
 using namespace llvm;
 
@@ -108,7 +108,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
                                      ArrayRef<ArgInfo> Args,
                                      ValueHandler &Handler) const {
   MachineFunction &MF = MIRBuilder.getMF();
-  const Function &F = *MF.getFunction();
+  const Function &F = MF.getFunction();
   const DataLayout &DL = F.getParent()->getDataLayout();
 
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -160,10 +160,11 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
     // FIXME: bitconverting between vector types may or may not be a
     // nop in big-endian situations.
     return ValReg;
-  case CCValAssign::AExt:
+  case CCValAssign::AExt: {
     assert(!VA.getLocVT().isVector() && "unexpected vector extend");
-    // Otherwise, it's a nop.
-    return ValReg;
+    auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg);
+    return MIB->getOperand(0).getReg();
+  }
   case CCValAssign::SExt: {
     unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
     MIRBuilder.buildSExt(NewReg, ValReg);
diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 29d1209bb02a..00c6a9d63158 100644
--- a/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -16,13 +16,6 @@
 
 using namespace llvm;
 
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-
-void llvm::initializeGlobalISel(PassRegistry &Registry) {
-}
-
-#else
-
 void llvm::initializeGlobalISel(PassRegistry &Registry) {
   initializeIRTranslatorPass(Registry);
   initializeLegalizerPass(Registry);
@@ -30,4 +23,3 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) {
   initializeRegBankSelectPass(Registry);
   initializeInstructionSelectPass(Registry);
 }
-#endif // LLVM_BUILD_GLOBAL_ISEL
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index ed1bd995e60b..433f99b0113b 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -15,7 +15,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
 #include "llvm/CodeGen/LowLevelType.h"
@@ -26,7 +26,11 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -54,12 +58,8 @@
 #include "llvm/Support/LowLevelTypeImpl.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -124,8 +124,8 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
     bool Success = translate(*CV, VReg);
     if (!Success) {
       OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-                                 MF->getFunction()->getSubprogram(),
-                                 &MF->getFunction()->getEntryBlock());
+                                 MF->getFunction().getSubprogram(),
+                                 &MF->getFunction().getEntryBlock());
       R << "unable to translate constant: " << ore::NV("Type", Val.getType());
       reportTranslationError(*MF, *TPC, *ORE, R);
       return VReg;
@@ -238,6 +238,8 @@ bool IRTranslator::translateCompare(const User &U,
 bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
   const ReturnInst &RI = cast<ReturnInst>(U);
   const Value *Ret = RI.getReturnValue();
+  if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
+    Ret = nullptr;
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
@@ -337,6 +339,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
                                : MachineMemOperand::MONone;
   Flags |= MachineMemOperand::MOLoad;
 
+  if (DL->getTypeStoreSize(LI.getType()) == 0)
+    return true;
+
   unsigned Res = getOrCreateVReg(LI);
   unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
 
@@ -355,6 +360,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
                                : MachineMemOperand::MONone;
   Flags |= MachineMemOperand::MOStore;
 
+  if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
+    return true;
+
   unsigned Val = getOrCreateVReg(*SI.getValueOperand());
   unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
 
@@ -583,7 +591,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
   MIB.addDef(DstReg);
 
   auto &TLI = *MF->getSubtarget().getTargetLowering();
-  Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent());
+  Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
   if (!Global)
     return;
 
@@ -593,7 +601,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
                MachineMemOperand::MODereferenceable;
   *MemRefs =
       MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
-                               DL->getPointerABIAlignment());
+                               DL->getPointerABIAlignment(0));
   MIB.setMemRefs(MemRefs, MemRefs + 1);
 }
 
@@ -682,23 +690,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
-      MIRBuilder.buildIndirectDbgValue(0, DI.getOffset(), DI.getVariable(),
-                                       DI.getExpression());
+      MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
     } else if (const auto *CI = dyn_cast<Constant>(V)) {
-      MIRBuilder.buildConstDbgValue(*CI, DI.getOffset(), DI.getVariable(),
-                                    DI.getExpression());
+      MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
     } else {
       unsigned Reg = getOrCreateVReg(*V);
       // FIXME: This does not handle register-indirect values at offset 0. The
       // direct/indirect thing shouldn't really be handled by something as
       // implicit as reg+noreg vs reg+imm in the first palce, but it seems
       // pretty baked in right now.
-      if (DI.getOffset() != 0)
-        MIRBuilder.buildIndirectDbgValue(Reg, DI.getOffset(), DI.getVariable(),
-                                         DI.getExpression());
-      else
-        MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(),
-                                       DI.getExpression());
+      MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
     }
     return true;
   }
@@ -850,14 +851,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
   TargetLowering::IntrinsicInfo Info;
   // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
-  if (TLI.getTgtMemIntrinsic(Info, CI, ID)) {
-    MachineMemOperand::Flags Flags =
-        Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
-    Flags |=
-        Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore;
-    uint64_t Size = Info.memVT.getSizeInBits() >> 3;
+  if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
+    uint64_t Size = Info.memVT.getStoreSize();
     MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
-                                               Flags, Size, Info.align));
+                                               Info.flags, Size, Info.align));
   }
 
   return true;
@@ -928,7 +925,7 @@ bool IRTranslator::translateLandingPad(const User &U,
   // If there aren't registers to copy the values into (e.g., during SjLj
   // exceptions), then don't bother.
   auto &TLI = *MF->getSubtarget().getTargetLowering();
-  const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn();
+  const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
   if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
       TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
     return true;
@@ -1105,7 +1102,7 @@ bool IRTranslator::translateShuffleVector(const User &U,
 
 bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
   const PHINode &PI = cast<PHINode>(U);
-  auto MIB = MIRBuilder.buildInstr(TargetOpcode::PHI);
+  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
   MIB.addDef(getOrCreateVReg(PI));
 
   PendingPHIs.emplace_back(&PI, MIB.getInstr());
@@ -1239,7 +1236,7 @@ void IRTranslator::finalizeFunction() {
 
 bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   MF = &CurMF;
-  const Function &F = *MF->getFunction();
+  const Function &F = MF->getFunction();
   if (F.empty())
     return false;
   CLI = MF->getSubtarget().getCallLowering();
@@ -1252,6 +1249,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
 
   assert(PendingPHIs.empty() && "stale PHIs");
 
+  if (!DL->isLittleEndian()) {
+    // Currently we don't properly handle big endian code.
+    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+                               F.getSubprogram(), &F.getEntryBlock());
+    R << "unable to translate in big endian mode";
+    reportTranslationError(*MF, *TPC, *ORE, R);
+  }
+
   // Release the per-function state when we return, whether we succeeded or not.
   auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
 
@@ -1276,12 +1281,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
 
   // Lower the actual args into this basic block.
   SmallVector<unsigned, 8> VRegArgs;
-  for (const Argument &Arg: F.args())
+  for (const Argument &Arg: F.args()) {
+    if (DL->getTypeStoreSize(Arg.getType()) == 0)
+      continue; // Don't handle zero sized types.
     VRegArgs.push_back(getOrCreateVReg(Arg));
+  }
   if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-                               MF->getFunction()->getSubprogram(),
-                               &MF->getFunction()->getEntryBlock());
+                               F.getSubprogram(), &F.getEntryBlock());
     R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
     reportTranslationError(*MF, *TPC, *ORE, R);
     return false;
@@ -1298,14 +1305,18 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
       if (translate(Inst))
         continue;
 
-      std::string InstStrStorage;
-      raw_string_ostream InstStr(InstStrStorage);
-      InstStr << Inst;
-
       OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                  Inst.getDebugLoc(), &BB);
-      R << "unable to translate instruction: " << ore::NV("Opcode", &Inst)
-        << ": '" << InstStr.str() << "'";
+      R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
+
+      if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
+        std::string InstStrStorage;
+        raw_string_ostream InstStr(InstStrStorage);
+        InstStr << Inst;
+
+        R << ": '" << InstStr.str() << "'";
+      }
+
       reportTranslationError(*MF, *TPC, *ORE, R);
       return false;
     }
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index a16e14fe2db6..422cc2219aa8 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -19,18 +19,29 @@
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
 
 #define DEBUG_TYPE "instruction-select"
 
 using namespace llvm;
 
+#ifdef LLVM_GISEL_COV_PREFIX
+static cl::opt<std::string>
+    CoveragePrefix("gisel-coverage-prefix", cl::init(LLVM_GISEL_COV_PREFIX),
+                   cl::desc("Record GlobalISel rule coverage files of this "
+                            "prefix if instrumentation was generated"));
+#else
+static const std::string CoveragePrefix = "";
+#endif
+
 char InstructionSelect::ID = 0;
 INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
                       "Select target instructions out of generic instructions",
@@ -66,6 +77,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
 
   const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
   const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+  CodeGenCoverage CoverageInfo;
   assert(ISel && "Cannot work without InstructionSelector");
 
   // An optimization remark emitter. Used to report failures.
@@ -127,7 +139,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      if (!ISel->select(MI)) {
+      if (!ISel->select(MI, CoverageInfo)) {
         // FIXME: It would be nice to dump all inserted instructions.  It's
         // not obvious how, esp. considering select() can insert after MI.
         reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
@@ -177,7 +189,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
 
   if (MF.size() != NumBlocks) {
     MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure",
-                                      MF.getFunction()->getSubprogram(),
+                                      MF.getFunction().getSubprogram(),
                                       /*MBB=*/nullptr);
     R << "inserting blocks is not supported yet";
     reportGISelFailure(MF, TPC, MORE, R);
@@ -187,6 +199,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
   auto &TLI = *MF.getSubtarget().getTargetLowering();
   TLI.finalizeLowering(MF);
 
+  CoverageInfo.emit(CoveragePrefix,
+                    MF.getSubtarget()
+                        .getTargetLowering()
+                        ->getTargetMachine()
+                        .getTarget()
+                        .getBackendName());
+
   // FIXME: Should we accurately track changes?
   return true;
 }
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index bf427225d6a9..88669bd68c00 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -6,8 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
 /// \file
 /// This file implements the InstructionSelector class.
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
@@ -16,14 +18,11 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 
 #define DEBUG_TYPE "instructionselector"
@@ -31,7 +30,7 @@
 using namespace llvm;
 
 InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
-    : Renderers(MaxRenderers, nullptr), MIs() {}
+    : Renderers(MaxRenderers), MIs() {}
 
 InstructionSelector::InstructionSelector() = default;
 
@@ -100,7 +99,30 @@ bool InstructionSelector::isOperandImmEqual(
   return false;
 }
 
-bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const {
+bool InstructionSelector::isBaseWithConstantOffset(
+    const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
+  if (!Root.isReg())
+    return false;
+
+  MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
+  if (RootI->getOpcode() != TargetOpcode::G_GEP)
+    return false;
+
+  MachineOperand &RHS = RootI->getOperand(2);
+  MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
+  if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
+    return false;
+
+  return true;
+}
+
+bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
+                                                MachineInstr &IntoMI) const {
+  // Immediate neighbours are already folded.
+  if (MI.getParent() == IntoMI.getParent() &&
+      std::next(MI.getIterator()) == IntoMI.getIterator())
+    return true;
+
   return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
          MI.implicit_operands().begin() == MI.implicit_operands().end();
 }
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index b699156c568b..f09b0d9f11e7 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -14,14 +14,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
+#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 #include <iterator>
 
@@ -50,79 +53,18 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
 void Legalizer::init(MachineFunction &MF) {
 }
 
-bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
-                              const TargetInstrInfo &TII,
-                              MachineIRBuilder &MIRBuilder) {
-  if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+static bool isArtifact(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  default:
     return false;
-
-  unsigned NumDefs = MI.getNumOperands() - 1;
-  unsigned SrcReg = MI.getOperand(NumDefs).getReg();
-  MachineInstr &MergeI = *MRI.def_instr_begin(SrcReg);
-  if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES)
-    return false;
-
-  const unsigned NumMergeRegs = MergeI.getNumOperands() - 1;
-
-  if (NumMergeRegs < NumDefs) {
-    if (NumDefs % NumMergeRegs != 0)
-      return false;
-
-    MIRBuilder.setInstr(MI);
-    // Transform to UNMERGEs, for example
-    //   %1 = G_MERGE_VALUES %4, %5
-    //   %9, %10, %11, %12 = G_UNMERGE_VALUES %1
-    // to
-    //   %9, %10 = G_UNMERGE_VALUES %4
-    //   %11, %12 = G_UNMERGE_VALUES %5
-
-    const unsigned NewNumDefs = NumDefs / NumMergeRegs;
-    for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
-      SmallVector<unsigned, 2> DstRegs;
-      for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
-           ++j, ++DefIdx)
-        DstRegs.push_back(MI.getOperand(DefIdx).getReg());
-
-      MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg());
-    }
-
-  } else if (NumMergeRegs > NumDefs) {
-    if (NumMergeRegs % NumDefs != 0)
-      return false;
-
-    MIRBuilder.setInstr(MI);
-    // Transform to MERGEs
-    //   %6 = G_MERGE_VALUES %17, %18, %19, %20
-    //   %7, %8 = G_UNMERGE_VALUES %6
-    // to
-    //   %7 = G_MERGE_VALUES %17, %18
-    //   %8 = G_MERGE_VALUES %19, %20
-
-    const unsigned NumRegs = NumMergeRegs / NumDefs;
-    for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
-      SmallVector<unsigned, 2> Regs;
-      for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx)
-        Regs.push_back(MergeI.getOperand(Idx).getReg());
-
-      MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
-    }
-
-  } else {
-    // FIXME: is a COPY appropriate if the types mismatch? We know both
-    // registers are allocatable by now.
-    if (MRI.getType(MI.getOperand(0).getReg()) !=
-        MRI.getType(MergeI.getOperand(1).getReg()))
-      return false;
-
-    for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
-      MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
-                         MergeI.getOperand(Idx + 1).getReg());
+  case TargetOpcode::G_TRUNC:
+  case TargetOpcode::G_ZEXT:
+  case TargetOpcode::G_ANYEXT:
+  case TargetOpcode::G_SEXT:
+  case TargetOpcode::G_MERGE_VALUES:
+  case TargetOpcode::G_UNMERGE_VALUES:
+    return true;
   }
-
-  MI.eraseFromParent();
-  if (MRI.use_empty(MergeI.getOperand(0).getReg()))
-    MergeI.eraseFromParent();
-  return true;
 }
 
 bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
@@ -136,79 +78,108 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
   MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
   LegalizerHelper Helper(MF);
 
-  // FIXME: an instruction may need more than one pass before it is legal. For
-  // example on most architectures <3 x i3> is doubly-illegal. It would
-  // typically proceed along a path like: <3 x i3> -> <3 x i8> -> <8 x i8>. We
-  // probably want a worklist of instructions rather than naive iterate until
-  // convergence for performance reasons.
-  bool Changed = false;
-  MachineBasicBlock::iterator NextMI;
-  for (auto &MBB : MF) {
-    for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
-      // Get the next Instruction before we try to legalize, because there's a
-      // good chance MI will be deleted.
-      NextMI = std::next(MI);
+  const size_t NumBlocks = MF.size();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
 
+  // Populate Insts
+  GISelWorkList<256> InstList;
+  GISelWorkList<128> ArtifactList;
+  ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+  // Perform legalization bottom up so we can DCE as we legalize.
+  // Traverse BB in RPOT and within each basic block, add insts top down,
+  // so when we pop_back_val in the legalization process, we traverse bottom-up.
+  for (auto *MBB : RPOT) {
+    if (MBB->empty())
+      continue;
+    for (MachineInstr &MI : *MBB) {
       // Only legalize pre-isel generic instructions: others don't have types
       // and are assumed to be legal.
-      if (!isPreISelGenericOpcode(MI->getOpcode()))
+      if (!isPreISelGenericOpcode(MI.getOpcode()))
         continue;
-      unsigned NumNewInsns = 0;
-      SmallVector<MachineInstr *, 4> WorkList;
-      Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
-        // Only legalize pre-isel generic instructions.
-        // Legalization process could generate Target specific pseudo
-        // instructions with generic types. Don't record them
-        if (isPreISelGenericOpcode(MI->getOpcode())) {
-          ++NumNewInsns;
-          WorkList.push_back(MI);
-        }
-      });
-      WorkList.push_back(&*MI);
-
-      bool Changed = false;
-      LegalizerHelper::LegalizeResult Res;
-      unsigned Idx = 0;
-      do {
-        Res = Helper.legalizeInstrStep(*WorkList[Idx]);
-        // Error out if we couldn't legalize this instruction. We may want to
-        // fall back to DAG ISel instead in the future.
-        if (Res == LegalizerHelper::UnableToLegalize) {
-          Helper.MIRBuilder.stopRecordingInsertions();
-          if (Res == LegalizerHelper::UnableToLegalize) {
-            reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
-                               "unable to legalize instruction",
-                               *WorkList[Idx]);
-            return false;
-          }
-        }
-        Changed |= Res == LegalizerHelper::Legalized;
-        ++Idx;
-
-#ifndef NDEBUG
-        if (NumNewInsns) {
-          DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n");
-          for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end();
-               I != E; ++I)
-            DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs()));
-          NumNewInsns = 0;
-        }
-#endif
-      } while (Idx < WorkList.size());
-
-      Helper.MIRBuilder.stopRecordingInsertions();
+      if (isArtifact(MI))
+        ArtifactList.insert(&MI);
+      else
+        InstList.insert(&MI);
     }
   }
-
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  for (auto &MBB : MF) {
-    for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
-      // Get the next Instruction before we try to legalize, because there's a
-      // good chance MI will be deleted.
-      NextMI = std::next(MI);
-      Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder);
+  Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
+    // Only legalize pre-isel generic instructions.
+    // Legalization process could generate Target specific pseudo
+    // instructions with generic types. Don't record them
+    if (isPreISelGenericOpcode(MI->getOpcode())) {
+      if (isArtifact(*MI))
+        ArtifactList.insert(MI);
+      else
+        InstList.insert(MI);
     }
+    DEBUG(dbgs() << ".. .. New MI: " << *MI;);
+  });
+  const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
+  LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo);
+  auto RemoveDeadInstFromLists = [&InstList,
+                                  &ArtifactList](MachineInstr *DeadMI) {
+    InstList.remove(DeadMI);
+    ArtifactList.remove(DeadMI);
+  };
+  bool Changed = false;
+  do {
+    while (!InstList.empty()) {
+      MachineInstr &MI = *InstList.pop_back_val();
+      assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+      if (isTriviallyDead(MI, MRI)) {
+        DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+        MI.eraseFromParentAndMarkDBGValuesForRemoval();
+        continue;
+      }
+
+      // Do the legalization for this instruction.
+      auto Res = Helper.legalizeInstrStep(MI);
+      // Error out if we couldn't legalize this instruction. We may want to
+      // fall back to DAG ISel instead in the future.
+      if (Res == LegalizerHelper::UnableToLegalize) {
+        Helper.MIRBuilder.stopRecordingInsertions();
+        reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+                           "unable to legalize instruction", MI);
+        return false;
+      }
+      Changed |= Res == LegalizerHelper::Legalized;
+    }
+    while (!ArtifactList.empty()) {
+      MachineInstr &MI = *ArtifactList.pop_back_val();
+      assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+      if (isTriviallyDead(MI, MRI)) {
+        DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+        RemoveDeadInstFromLists(&MI);
+        MI.eraseFromParentAndMarkDBGValuesForRemoval();
+        continue;
+      }
+      SmallVector<MachineInstr *, 4> DeadInstructions;
+      if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
+        for (auto *DeadMI : DeadInstructions) {
+          DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);
+          RemoveDeadInstFromLists(DeadMI);
+          DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
+        }
+        Changed = true;
+        continue;
+      }
+      // If this was not an artifact (that could be combined away), this might
+      // need special handling. Add it to InstList, so when it's processed
+      // there, it has to be legal or specially handled.
+      else
+        InstList.insert(&MI);
+    }
+  } while (!InstList.empty());
+
+  // For now don't support if new blocks are inserted - we would need to fix the
+  // outerloop for that.
+  if (MF.size() != NumBlocks) {
+    MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
+                                      MF.getFunction().getSubprogram(),
+                                      /*MBB=*/nullptr);
+    R << "inserting blocks is not supported yet";
+    reportGISelFailure(MF, TPC, MORE, R);
+    return false;
   }
 
   return Changed;
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 5258370e6680..87a658be4c29 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -17,12 +17,11 @@
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
-#include <sstream>
 
 #define DEBUG_TYPE "legalizer"
 
@@ -91,6 +90,15 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
   case TargetOpcode::G_FADD:
     assert((Size == 32 || Size == 64) && "Unsupported size");
     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
+  case TargetOpcode::G_FSUB:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
+  case TargetOpcode::G_FMUL:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
+  case TargetOpcode::G_FDIV:
+    assert((Size == 32 || Size == 64) && "Unsupported size");
+    return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
   case TargetOpcode::G_FREM:
     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
   case TargetOpcode::G_FPOW:
@@ -128,7 +136,7 @@ LegalizerHelper::LegalizeResult
 LegalizerHelper::libcall(MachineInstr &MI) {
   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
   unsigned Size = LLTy.getSizeInBits();
-  auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
 
   MIRBuilder.setInstr(MI);
 
@@ -146,6 +154,9 @@ LegalizerHelper::libcall(MachineInstr &MI) {
     break;
   }
   case TargetOpcode::G_FADD:
+  case TargetOpcode::G_FSUB:
+  case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FDIV:
   case TargetOpcode::G_FPOW:
   case TargetOpcode::G_FREM: {
     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
@@ -169,12 +180,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
 
   MIRBuilder.setInstr(MI);
 
+  int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+  int64_t NarrowSize = NarrowTy.getSizeInBits();
+
   switch (MI.getOpcode()) {
   default:
     return UnableToLegalize;
   case TargetOpcode::G_IMPLICIT_DEF: {
-    int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
-                   NarrowTy.getSizeInBits();
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+    int NumParts = SizeOp0 / NarrowSize;
 
     SmallVector<unsigned, 2> DstRegs;
     for (int i = 0; i < NumParts; ++i) {
@@ -187,9 +204,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     return Legalized;
   }
   case TargetOpcode::G_ADD: {
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
     // Expand in terms of carry-setting/consuming G_ADDE instructions.
-    int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
-                   NarrowTy.getSizeInBits();
+    int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
 
     SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
@@ -217,9 +237,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     if (TypeIdx != 1)
       return UnableToLegalize;
 
-    int64_t NarrowSize = NarrowTy.getSizeInBits();
-    int NumParts =
-        MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize;
+    int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    // FIXME: add support for when SizeOp1 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp1 % NarrowSize != 0)
+      return UnableToLegalize;
+    int NumParts = SizeOp1 / NarrowSize;
 
     SmallVector<unsigned, 2> SrcRegs, DstRegs;
     SmallVector<uint64_t, 2> Indexes;
@@ -266,12 +289,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     return Legalized;
   }
   case TargetOpcode::G_INSERT: {
-    if (TypeIdx != 0)
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
       return UnableToLegalize;
 
-    int64_t NarrowSize = NarrowTy.getSizeInBits();
-    int NumParts =
-        MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+    int NumParts = SizeOp0 / NarrowSize;
 
     SmallVector<unsigned, 2> SrcRegs, DstRegs;
     SmallVector<uint64_t, 2> Indexes;
@@ -326,9 +349,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     return Legalized;
   }
   case TargetOpcode::G_LOAD: {
-    unsigned NarrowSize = NarrowTy.getSizeInBits();
-    int NumParts =
-        MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+    int NumParts = SizeOp0 / NarrowSize;
     LLT OffsetTy = LLT::scalar(
         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
 
@@ -353,9 +378,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     return Legalized;
   }
   case TargetOpcode::G_STORE: {
-    unsigned NarrowSize = NarrowTy.getSizeInBits();
-    int NumParts =
-        MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+    int NumParts = SizeOp0 / NarrowSize;
     LLT OffsetTy = LLT::scalar(
         MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
 
@@ -377,11 +404,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     return Legalized;
   }
   case TargetOpcode::G_CONSTANT: {
-    unsigned NarrowSize = NarrowTy.getSizeInBits();
-    int NumParts =
-        MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+    int NumParts = SizeOp0 / NarrowSize;
     const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
-    LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
 
     SmallVector<unsigned, 2> DstRegs;
     for (int i = 0; i < NumParts; ++i) {
@@ -396,6 +425,53 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_OR: {
+    // Legalize bitwise operation:
+    // A = BinOp<Ty> B, C
+    // into:
+    // B1, ..., BN = G_UNMERGE_VALUES B
+    // C1, ..., CN = G_UNMERGE_VALUES C
+    // A1 = BinOp<Ty/N> B1, C2
+    // ...
+    // AN = BinOp<Ty/N> BN, CN
+    // A = G_MERGE_VALUES A1, ..., AN
+
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+    int NumParts = SizeOp0 / NarrowSize;
+
+    // List the registers where the destination will be scattered.
+    SmallVector<unsigned, 2> DstRegs;
+    // List the registers where the first argument will be split.
+    SmallVector<unsigned, 2> SrcsReg1;
+    // List the registers where the second argument will be split.
+    SmallVector<unsigned, 2> SrcsReg2;
+    // Create all the temporary registers.
+    for (int i = 0; i < NumParts; ++i) {
+      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
+      unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
+
+      DstRegs.push_back(DstReg);
+      SrcsReg1.push_back(SrcReg1);
+      SrcsReg2.push_back(SrcReg2);
+    }
+    // Explode the big arguments into smaller chunks.
+    MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
+    MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
+
+    // Do the operation on each small part.
+    for (int i = 0; i < NumParts; ++i)
+      MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]);
+
+    // Gather the destination registers into the final destination.
+    unsigned DstReg = MI.getOperand(0).getReg();
+    MIRBuilder.buildMerge(DstReg, DstRegs);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   }
 }
 
@@ -597,22 +673,58 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_FCMP: {
+    unsigned Op0Ext, Op1Ext, DstReg;
+    unsigned Cmp1 = MI.getOperand(2).getReg();
+    unsigned Cmp2 = MI.getOperand(3).getReg();
+    if (TypeIdx == 0) {
+      Op0Ext = Cmp1;
+      Op1Ext = Cmp2;
+      DstReg = MRI.createGenericVirtualRegister(WideTy);
+    } else {
+      Op0Ext = MRI.createGenericVirtualRegister(WideTy);
+      Op1Ext = MRI.createGenericVirtualRegister(WideTy);
+      DstReg = MI.getOperand(0).getReg();
+      MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op0Ext, Cmp1);
+      MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op1Ext, Cmp2);
+    }
+    MIRBuilder.buildFCmp(
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
+        DstReg, Op0Ext, Op1Ext);
+    if (TypeIdx == 0)
+      MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(),
+                            DstReg);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   case TargetOpcode::G_ICMP: {
-    assert(TypeIdx == 1 && "unable to legalize predicate");
     bool IsSigned = CmpInst::isSigned(
         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()));
-    unsigned Op0Ext = MRI.createGenericVirtualRegister(WideTy);
-    unsigned Op1Ext = MRI.createGenericVirtualRegister(WideTy);
-    if (IsSigned) {
-      MIRBuilder.buildSExt(Op0Ext, MI.getOperand(2).getReg());
-      MIRBuilder.buildSExt(Op1Ext, MI.getOperand(3).getReg());
+    unsigned Cmp1 = MI.getOperand(2).getReg();
+    unsigned Cmp2 = MI.getOperand(3).getReg();
+    unsigned Op0Ext, Op1Ext, DstReg;
+    if (TypeIdx == 0) {
+      Op0Ext = Cmp1;
+      Op1Ext = Cmp2;
+      DstReg = MRI.createGenericVirtualRegister(WideTy);
     } else {
-      MIRBuilder.buildZExt(Op0Ext, MI.getOperand(2).getReg());
-      MIRBuilder.buildZExt(Op1Ext, MI.getOperand(3).getReg());
+      Op0Ext = MRI.createGenericVirtualRegister(WideTy);
+      Op1Ext = MRI.createGenericVirtualRegister(WideTy);
+      DstReg = MI.getOperand(0).getReg();
+      if (IsSigned) {
+        MIRBuilder.buildSExt(Op0Ext, Cmp1);
+        MIRBuilder.buildSExt(Op1Ext, Cmp2);
+      } else {
+        MIRBuilder.buildZExt(Op0Ext, Cmp1);
+        MIRBuilder.buildZExt(Op1Ext, Cmp2);
+      }
     }
     MIRBuilder.buildICmp(
         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
-        MI.getOperand(0).getReg(), Op0Ext, Op1Ext);
+        DstReg, Op0Ext, Op1Ext);
+    if (TypeIdx == 0)
+      MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(),
+                            DstReg);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -623,6 +735,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     MI.getOperand(2).setReg(OffsetExt);
     return Legalized;
   }
+  case TargetOpcode::G_PHI: {
+    assert(TypeIdx == 0 && "Expecting only Idx 0");
+    auto getExtendedReg = [&](unsigned Reg, MachineBasicBlock &MBB) {
+      auto FirstTermIt = MBB.getFirstTerminator();
+      MIRBuilder.setInsertPt(MBB, FirstTermIt);
+      MachineInstr *DefMI = MRI.getVRegDef(Reg);
+      MachineInstrBuilder MIB;
+      if (DefMI->getOpcode() == TargetOpcode::G_TRUNC)
+        MIB = MIRBuilder.buildAnyExtOrTrunc(WideTy,
+                                            DefMI->getOperand(1).getReg());
+      else
+        MIB = MIRBuilder.buildAnyExt(WideTy, Reg);
+      return MIB->getOperand(0).getReg();
+    };
+    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, WideTy);
+    for (auto OpIt = MI.operands_begin() + 1, OpE = MI.operands_end();
+         OpIt != OpE;) {
+      unsigned Reg = OpIt++->getReg();
+      MachineBasicBlock *OpMBB = OpIt++->getMBB();
+      MIB.addReg(getExtendedReg(Reg, *OpMBB));
+      MIB.addMBB(OpMBB);
+    }
+    auto *MBB = MI.getParent();
+    MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(),
+                          MIB->getOperand(0).getReg());
+    MI.eraseFromParent();
+    return Legalized;
+  }
   }
 }
 
@@ -683,7 +824,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
       return UnableToLegalize;
     unsigned Res = MI.getOperand(0).getReg();
     Type *ZeroTy;
-    LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
     switch (Ty.getSizeInBits()) {
     case 16:
       ZeroTy = Type::getHalfTy(Ctx);
@@ -726,6 +867,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+    unsigned OldValRes = MI.getOperand(0).getReg();
+    unsigned SuccessRes = MI.getOperand(1).getReg();
+    unsigned Addr = MI.getOperand(2).getReg();
+    unsigned CmpVal = MI.getOperand(3).getReg();
+    unsigned NewVal = MI.getOperand(4).getReg();
+    MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
+                                  **MI.memoperands_begin());
+    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   }
 }
 
@@ -741,7 +894,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_ADD: {
     unsigned NarrowSize = NarrowTy.getSizeInBits();
     unsigned DstReg = MI.getOperand(0).getReg();
-    int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize;
+    unsigned Size = MRI.getType(DstReg).getSizeInBits();
+    int NumParts = Size / NarrowSize;
+    // FIXME: Don't know how to handle the situation where the small vectors
+    // aren't all the same size yet.
+    if (Size % NarrowSize != 0)
+      return UnableToLegalize;
 
     MIRBuilder.setInstr(MI);
 
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 76917aa9660d..9c27c59a0654 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -22,51 +22,136 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOpcodes.h"
 #include <algorithm>
-#include <cassert>
-#include <tuple>
-#include <utility>
-
+#include <map>
 using namespace llvm;
 
-LegalizerInfo::LegalizerInfo() {
-  DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar;
-
-  // FIXME: these two can be legalized to the fundamental load/store Jakob
-  // proposed. Once loads & stores are supported.
-  DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
-  DefaultActions[TargetOpcode::G_TRUNC] = Legal;
+LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+  // Set defaults.
+  // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the
+  // fundamental load/store Jakob proposed. Once loads & stores are supported.
+  setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}});
+  setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}});
+  setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}});
+  setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}});
+  setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}});
 
-  DefaultActions[TargetOpcode::G_INTRINSIC] = Legal;
-  DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal;
+  setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
+  setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
 
-  DefaultActions[TargetOpcode::G_ADD] = NarrowScalar;
-  DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar;
-  DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest);
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall);
 
-  DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
-  DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
-  DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar;
-  DefaultActions[TargetOpcode::G_FNEG] = Lower;
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise);
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall);
+  setLegalizeScalarToDifferentSizeStrategy(
+      TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall);
+  setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}});
 }
 
 void LegalizerInfo::computeTables() {
-  for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) {
-    for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) {
-      for (auto &Action : Actions[Opcode][Idx]) {
-        LLT Ty = Action.first;
-        if (!Ty.isVector())
-          continue;
-
-        auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp,
-                                                        Ty.getElementType())];
-        Entry = std::max(Entry, Ty.getNumElements());
+  assert(TablesInitialized == false);
+
+  for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) {
+    const unsigned Opcode = FirstOp + OpcodeIdx;
+    for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size();
+         ++TypeIdx) {
+      // 0. Collect information specified through the setAction API, i.e.
+      // for specific bit sizes.
+      // For scalar types:
+      SizeAndActionsVec ScalarSpecifiedActions;
+      // For pointer types:
+      std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions;
+      // For vector types:
+      std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions;
+      for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) {
+        const LLT Type = LLT2Action.first;
+        const LegalizeAction Action = LLT2Action.second;
+
+        auto SizeAction = std::make_pair(Type.getSizeInBits(), Action);
+        if (Type.isPointer())
+          AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back(
+              SizeAction);
+        else if (Type.isVector())
+          ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()]
+              .push_back(SizeAction);
+        else
+          ScalarSpecifiedActions.push_back(SizeAction);
+      }
+
+      // 1. Handle scalar types
+      {
+        // Decide how to handle bit sizes for which no explicit specification
+        // was given.
+        SizeChangeStrategy S = &unsupportedForDifferentSizes;
+        if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
+            ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+          S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
+        std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end());
+        checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
+        setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
       }
+
+      // 2. Handle pointer types
+      for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
+        std::sort(PointerSpecifiedActions.second.begin(),
+                  PointerSpecifiedActions.second.end());
+        checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
+        // For pointer types, we assume that there isn't a meaningfull way
+        // to change the number of bits used in the pointer.
+        setPointerAction(
+            Opcode, TypeIdx, PointerSpecifiedActions.first,
+            unsupportedForDifferentSizes(PointerSpecifiedActions.second));
+      }
+
+      // 3. Handle vector types
+      SizeAndActionsVec ElementSizesSeen;
+      for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
+        std::sort(VectorSpecifiedActions.second.begin(),
+                  VectorSpecifiedActions.second.end());
+        const uint16_t ElementSize = VectorSpecifiedActions.first;
+        ElementSizesSeen.push_back({ElementSize, Legal});
+        checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
+        // For vector types, we assume that the best way to adapt the number
+        // of elements is to the next larger number of elements type for which
+        // the vector type is legal, unless there is no such type. In that case,
+        // legalize towards a vector type with a smaller number of elements.
+        SizeAndActionsVec NumElementsActions;
+        for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) {
+          assert(BitsizeAndAction.first % ElementSize == 0);
+          const uint16_t NumElements = BitsizeAndAction.first / ElementSize;
+          NumElementsActions.push_back({NumElements, BitsizeAndAction.second});
+        }
+        setVectorNumElementAction(
+            Opcode, TypeIdx, ElementSize,
+            moreToWiderTypesAndLessToWidest(NumElementsActions));
+      }
+      std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());
+      SizeChangeStrategy VectorElementSizeChangeStrategy =
+          &unsupportedForDifferentSizes;
+      if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() &&
+          VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
+        VectorElementSizeChangeStrategy =
+            VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx];
+      setScalarInVectorAction(
+          Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen));
     }
   }
 
@@ -82,62 +167,23 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
   assert(TablesInitialized && "backend forgot to call computeTables");
   // These *have* to be implemented for now, they're the fundamental basis of
   // how everything else is transformed.
+  if (Aspect.Type.isScalar() || Aspect.Type.isPointer())
+    return findScalarLegalAction(Aspect);
+  assert(Aspect.Type.isVector());
+  return findVectorLegalAction(Aspect);
+}
 
-  // FIXME: the long-term plan calls for expansion in terms of load/store (if
-  // they're not legal).
-  if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
-      Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
-    return std::make_pair(Legal, Aspect.Type);
-
-  LLT Ty = Aspect.Type;
-  LegalizeAction Action = findInActions(Aspect);
-  // LegalizerHelper is not able to handle non-power-of-2 types right now, so do
-  // not try to legalize them unless they are marked as Legal or Custom.
-  // FIXME: This is a temporary hack until the general non-power-of-2
-  // legalization works.
-  if (!isPowerOf2_64(Ty.getSizeInBits()) &&
-      !(Action == Legal || Action == Custom))
-    return std::make_pair(Unsupported, LLT());
-
-  if (Action != NotFound)
-    return findLegalAction(Aspect, Action);
-
-  unsigned Opcode = Aspect.Opcode;
-  if (!Ty.isVector()) {
-    auto DefaultAction = DefaultActions.find(Aspect.Opcode);
-    if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
-      return std::make_pair(Legal, Ty);
-
-    if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower)
-      return std::make_pair(Lower, Ty);
-
-    if (DefaultAction == DefaultActions.end() ||
-        DefaultAction->second != NarrowScalar)
-      return std::make_pair(Unsupported, LLT());
-    return findLegalAction(Aspect, NarrowScalar);
-  }
-
-  LLT EltTy = Ty.getElementType();
-  int NumElts = Ty.getNumElements();
-
-  auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy));
-  if (ScalarAction != ScalarInVectorActions.end() &&
-      ScalarAction->second != Legal)
-    return findLegalAction(Aspect, ScalarAction->second);
-
-  // The element type is legal in principle, but the number of elements is
-  // wrong.
-  auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy));
-  if (MaxLegalElts > NumElts)
-    return findLegalAction(Aspect, MoreElements);
-
-  if (MaxLegalElts == 0) {
-    // Scalarize if there's no legal vector type, which is just a special case
-    // of FewerElements.
-    return std::make_pair(FewerElements, EltTy);
-  }
-
-  return findLegalAction(Aspect, FewerElements);
+/// Helper function to get LLT for the given type index.
+static LLT getTypeFromTypeIdx(const MachineInstr &MI,
+                              const MachineRegisterInfo &MRI, unsigned OpIdx,
+                              unsigned TypeIdx) {
+  assert(TypeIdx < MI.getNumOperands() && "Unexpected TypeIdx");
+  // G_UNMERGE_VALUES has variable number of operands, but there is only
+  // one source type and one destination type as all destinations must be the
+  // same type. So, get the last operand if TypeIdx == 1.
+  if (MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && TypeIdx == 1)
+    return MRI.getType(MI.getOperand(MI.getNumOperands() - 1).getReg());
+  return MRI.getType(MI.getOperand(OpIdx).getReg());
 }
 
 std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT>
@@ -145,19 +191,20 @@ LegalizerInfo::getAction(const MachineInstr &MI,
                          const MachineRegisterInfo &MRI) const {
   SmallBitVector SeenTypes(8);
   const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
+  // FIXME: probably we'll need to cache the results here somehow?
   for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) {
     if (!OpInfo[i].isGenericType())
       continue;
 
-    // We don't want to repeatedly check the same operand index, that
-    // could get expensive.
+    // We must only record actions once for each TypeIdx; otherwise we'd
+    // try to legalize operands multiple times down the line.
     unsigned TypeIdx = OpInfo[i].getGenericTypeIndex();
     if (SeenTypes[TypeIdx])
       continue;
 
     SeenTypes.set(TypeIdx);
 
-    LLT Ty = MRI.getType(MI.getOperand(i).getReg());
+    LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx);
     auto Action = getAction({MI.getOpcode(), TypeIdx, Ty});
     if (Action.first != Legal)
       return std::make_tuple(Action.first, TypeIdx, Action.second);
@@ -170,38 +217,166 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
   return std::get<0>(getAction(MI, MRI)) == Legal;
 }
 
-Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect,
-                                 LegalizeAction Action) const {
-  switch(Action) {
-  default:
-    llvm_unreachable("Cannot find legal type");
+bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                   MachineIRBuilder &MIRBuilder) const {
+  return false;
+}
+
+LegalizerInfo::SizeAndActionsVec
+LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest(
+    const SizeAndActionsVec &v, LegalizeAction IncreaseAction,
+    LegalizeAction DecreaseAction) {
+  SizeAndActionsVec result;
+  unsigned LargestSizeSoFar = 0;
+  if (v.size() >= 1 && v[0].first != 1)
+    result.push_back({1, IncreaseAction});
+  for (size_t i = 0; i < v.size(); ++i) {
+    result.push_back(v[i]);
+    LargestSizeSoFar = v[i].first;
+    if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) {
+      result.push_back({LargestSizeSoFar + 1, IncreaseAction});
+      LargestSizeSoFar = v[i].first + 1;
+    }
+  }
+  result.push_back({LargestSizeSoFar + 1, DecreaseAction});
+  return result;
+}
+
+LegalizerInfo::SizeAndActionsVec
+LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest(
+    const SizeAndActionsVec &v, LegalizeAction DecreaseAction,
+    LegalizeAction IncreaseAction) {
+  SizeAndActionsVec result;
+  if (v.size() == 0 || v[0].first != 1)
+    result.push_back({1, IncreaseAction});
+  for (size_t i = 0; i < v.size(); ++i) {
+    result.push_back(v[i]);
+    if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) {
+      result.push_back({v[i].first + 1, DecreaseAction});
+    }
+  }
+  return result;
+}
+
+LegalizerInfo::SizeAndAction
+LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
+  assert(Size >= 1);
+  // Find the last element in Vec that has a bitsize equal to or smaller than
+  // the requested bit size.
+  // That is the element just before the first element that is bigger than Size.
+  auto VecIt = std::upper_bound(
+      Vec.begin(), Vec.end(), Size,
+      [](const uint32_t Size, const SizeAndAction lhs) -> bool {
+        return Size < lhs.first;
+      });
+  assert(VecIt != Vec.begin() && "Does Vec not start with size 1?");
+  --VecIt;
+  int VecIdx = VecIt - Vec.begin();
+
+  LegalizeAction Action = Vec[VecIdx].second;
+  switch (Action) {
   case Legal:
   case Lower:
   case Libcall:
   case Custom:
-    return Aspect.Type;
+    return {Size, Action};
+  case FewerElements:
+    // FIXME: is this special case still needed and correct?
+    // Special case for scalarization:
+    if (Vec == SizeAndActionsVec({{1, FewerElements}}))
+      return {1, FewerElements};
+    LLVM_FALLTHROUGH;
   case NarrowScalar: {
-    return findLegalizableSize(
-        Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
-  }
-  case WidenScalar: {
-    return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT {
-      return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
-    });
-  }
-  case FewerElements: {
-    return findLegalizableSize(
-        Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); });
+    // The following needs to be a loop, as for now, we do allow needing to
+    // go over "Unsupported" bit sizes before finding a legalizable bit size.
+    // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8,
+    // we need to iterate over s9, and then to s32 to return (s32, Legal).
+    // If we want to get rid of the below loop, we should have stronger asserts
+    // when building the SizeAndActionsVecs, probably not allowing
+    // "Unsupported" unless at the ends of the vector.
+    for (int i = VecIdx - 1; i >= 0; --i)
+      if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+          Vec[i].second != Unsupported)
+        return {Vec[i].first, Action};
+    llvm_unreachable("");
   }
+  case WidenScalar:
   case MoreElements: {
-    return findLegalizableSize(
-        Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
+    // See above, the following needs to be a loop, at least for now.
+    for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i)
+      if (!needsLegalizingToDifferentSize(Vec[i].second) &&
+          Vec[i].second != Unsupported)
+        return {Vec[i].first, Action};
+    llvm_unreachable("");
   }
+  case Unsupported:
+    return {Size, Unsupported};
+  case NotFound:
+    llvm_unreachable("NotFound");
   }
+  llvm_unreachable("Action has an unknown enum value");
 }
 
-bool LegalizerInfo::legalizeCustom(MachineInstr &MI,
-                                   MachineRegisterInfo &MRI,
-                                   MachineIRBuilder &MIRBuilder) const {
-  return false;
+std::pair<LegalizerInfo::LegalizeAction, LLT>
+LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {
+  assert(Aspect.Type.isScalar() || Aspect.Type.isPointer());
+  if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+    return {NotFound, LLT()};
+  const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
+  if (Aspect.Type.isPointer() &&
+      AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) ==
+          AddrSpace2PointerActions[OpcodeIdx].end()) {
+    return {NotFound, LLT()};
+  }
+  const SmallVector<SizeAndActionsVec, 1> &Actions =
+      Aspect.Type.isPointer()
+          ? AddrSpace2PointerActions[OpcodeIdx]
+                .find(Aspect.Type.getAddressSpace())
+                ->second
+          : ScalarActions[OpcodeIdx];
+  if (Aspect.Idx >= Actions.size())
+    return {NotFound, LLT()};
+  const SizeAndActionsVec &Vec = Actions[Aspect.Idx];
+  // FIXME: speed up this search, e.g. by using a results cache for repeated
+  // queries?
+  auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits());
+  return {SizeAndAction.second,
+          Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first)
+                                 : LLT::pointer(Aspect.Type.getAddressSpace(),
+                                                SizeAndAction.first)};
+}
+
+std::pair<LegalizerInfo::LegalizeAction, LLT>
+LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
+  assert(Aspect.Type.isVector());
+  // First legalize the vector element size, then legalize the number of
+  // lanes in the vector.
+  if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+    return {NotFound, Aspect.Type};
+  const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
+  const unsigned TypeIdx = Aspect.Idx;
+  if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size())
+    return {NotFound, Aspect.Type};
+  const SizeAndActionsVec &ElemSizeVec =
+      ScalarInVectorActions[OpcodeIdx][TypeIdx];
+
+  LLT IntermediateType;
+  auto ElementSizeAndAction =
+      findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits());
+  IntermediateType =
+      LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first);
+  if (ElementSizeAndAction.second != Legal)
+    return {ElementSizeAndAction.second, IntermediateType};
+
+  auto i = NumElements2Actions[OpcodeIdx].find(
+      IntermediateType.getScalarSizeInBits());
+  if (i == NumElements2Actions[OpcodeIdx].end()) {
+    return {NotFound, IntermediateType};
+  }
+  const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx];
+  auto NumElementsAndAction =
+      findAction(NumElementsVec, IntermediateType.getNumElements());
+  return {NumElementsAndAction.second,
+          LLT::vector(NumElementsAndAction.first,
+                      IntermediateType.getScalarSizeInBits())};
 }
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index c5d0999fe438..8e16470b6f90 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -101,7 +101,8 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
           // Don't try to be smart for the insertion point.
           // There is no guarantee that the first seen use is the first
           // use in the block.
-          InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI);
+          InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+                            LocalizedMI);
 
           // Set a new register for the definition.
           unsigned NewReg =
@@ -112,7 +113,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
               MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
           DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
         }
-        DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second)
+        DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
                      << '\n');
         // Update the user reg.
         MOUse.setReg(NewVRegIt->second);
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 4636806c3f08..475bb82e5b9c 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -15,10 +15,10 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -83,30 +83,26 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
   return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilder::buildDirectDbgValue(
-    unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+                                      const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
-  return buildInstr(TargetOpcode::DBG_VALUE)
-      .addReg(Reg, RegState::Debug)
-      .addReg(0, RegState::Debug)
-      .addMetadata(Variable)
-      .addMetadata(Expr);
+  return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE),
+                             /*IsIndirect*/ false, Reg, Variable, Expr));
 }
 
-MachineInstrBuilder MachineIRBuilder::buildIndirectDbgValue(
-    unsigned Reg, unsigned Offset, const MDNode *Variable, const MDNode *Expr) {
+MachineInstrBuilder
+MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+                                        const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
-  return buildInstr(TargetOpcode::DBG_VALUE)
-      .addReg(Reg, RegState::Debug)
-      .addImm(Offset)
-      .addMetadata(Variable)
-      .addMetadata(Expr);
+  return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE),
+                             /*IsIndirect*/ true, Reg, Variable, Expr));
 }
 
 MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
@@ -124,7 +120,6 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
 }
 
 MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
-                                                         unsigned Offset,
                                                          const MDNode *Variable,
                                                          const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
@@ -144,7 +139,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
     MIB.addReg(0U);
   }
 
-  return MIB.addImm(Offset).addMetadata(Variable).addMetadata(Expr);
+  return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
@@ -268,7 +263,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
 
   const ConstantInt *NewVal = &Val;
   if (Ty.getSizeInBits() != Val.getBitWidth())
-    NewVal = ConstantInt::get(MF->getFunction()->getContext(),
+    NewVal = ConstantInt::get(MF->getFunction().getContext(),
                               Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
 
   return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
@@ -276,7 +271,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
 
 MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
                                                     int64_t Val) {
-  auto IntN = IntegerType::get(MF->getFunction()->getContext(),
+  auto IntN = IntegerType::get(MF->getFunction().getContext(),
                                MRI->getType(Res).getSizeInBits());
   ConstantInt *CI = ConstantInt::get(IntN, Val, true);
   return buildConstant(Res, *CI);
@@ -351,14 +346,17 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
   return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
-                                                       unsigned Op) {
+MachineInstrBuilder
+MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, unsigned Res, unsigned Op) {
+  assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||
+          TargetOpcode::G_SEXT == ExtOpc) &&
+         "Expecting Extending Opc");
   assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
   assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
 
   unsigned Opcode = TargetOpcode::COPY;
   if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
-    Opcode = TargetOpcode::G_SEXT;
+    Opcode = ExtOpc;
   else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
     Opcode = TargetOpcode::G_TRUNC;
   else
@@ -367,20 +365,19 @@ MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
   return buildInstr(Opcode).addDef(Res).addUse(Op);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
                                                        unsigned Op) {
-  assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
-  assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+  return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);
+}
 
-  unsigned Opcode = TargetOpcode::COPY;
-  if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
-    Opcode = TargetOpcode::G_ZEXT;
-  else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
-    Opcode = TargetOpcode::G_TRUNC;
-  else
-    assert(MRI->getType(Res) == MRI->getType(Op));
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
+                                                       unsigned Op) {
+  return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);
+}
 
-  return buildInstr(Opcode).addDef(Res).addUse(Op);
+MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(unsigned Res,
+                                                         unsigned Op) {
+  return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
@@ -661,6 +658,31 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
       .addUse(Idx);
 }
 
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
+                                     unsigned CmpVal, unsigned NewVal,
+                                     MachineMemOperand &MMO) {
+#ifndef NDEBUG
+  LLT OldValResTy = MRI->getType(OldValRes);
+  LLT AddrTy = MRI->getType(Addr);
+  LLT CmpValTy = MRI->getType(CmpVal);
+  LLT NewValTy = MRI->getType(NewVal);
+  assert(OldValResTy.isScalar() && "invalid operand type");
+  assert(AddrTy.isPointer() && "invalid operand type");
+  assert(CmpValTy.isValid() && "invalid operand type");
+  assert(NewValTy.isValid() && "invalid operand type");
+  assert(OldValResTy == CmpValTy && "type mismatch");
+  assert(OldValResTy == NewValTy && "type mismatch");
+#endif
+
+  return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG)
+      .addDef(OldValRes)
+      .addUse(Addr)
+      .addUse(CmpVal)
+      .addUse(NewVal)
+      .addMemOperand(&MMO);
+}
+
 void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
                                         bool IsExtend) {
 #ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 677941dbbf6d..006c9ea23034 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -26,9 +26,12 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/CommandLine.h"
@@ -36,9 +39,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -159,7 +159,7 @@ bool RegBankSelect::repairReg(
   // same types because the type is a placeholder when this function is called.
   MachineInstr *MI =
       MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
-  DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
+  DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
                << '\n');
   // TODO:
   // Check if MI is legal. if not, we need to legalize all the
@@ -221,9 +221,8 @@ uint64_t RegBankSelect::getRepairCost(
     // into a new virtual register.
     // We would also need to propagate this information in the
     // repairing placement.
-    unsigned Cost =
-        RBI->copyCost(*DesiredRegBrank, *CurRegBank,
-                      RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
+    unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank,
+                                  RBI->getSizeInBits(MO.getReg(), *MRI, *TRI));
     // TODO: use a dedicated constant for ImpossibleCost.
     if (Cost != std::numeric_limits<unsigned>::max())
       return Cost;
@@ -602,9 +601,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
-  const Function *F = MF.getFunction();
+  const Function &F = MF.getFunction();
   Mode SaveOptMode = OptMode;
-  if (F->hasFnAttribute(Attribute::OptimizeNone))
+  if (F.hasFnAttribute(Attribute::OptimizeNone))
     OptMode = Mode::Fast;
   init(MF);
 
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 83b21e637097..4d3ae69d3a9d 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 
 #define DEBUG_TYPE "registerbank"
 
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index a841902feed1..b3d9209ae6eb 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -19,13 +19,12 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 #include <algorithm> // For std::max.
 
@@ -84,7 +83,7 @@ const RegisterBank *
 RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
                              const TargetRegisterInfo &TRI) const {
   if (TargetRegisterInfo::isPhysicalRegister(Reg))
-    return &getRegBankFromRegClass(*TRI.getMinimalPhysRegClass(Reg));
+    return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
 
   assert(Reg && "NoRegister does not have a register bank");
   const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
@@ -95,6 +94,19 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
   return nullptr;
 }
 
+const TargetRegisterClass &
+RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg,
+                                         const TargetRegisterInfo &TRI) const {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         "Reg must be a physreg");
+  const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
+  if (RegRCIt != PhysRegMinimalRCs.end())
+    return *RegRCIt->second;
+  const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg);
+  PhysRegMinimalRCs[Reg] = PhysRC;
+  return *PhysRC;
+}
+
 const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
     const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
     const TargetRegisterInfo &TRI) const {
@@ -151,7 +163,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
   // is important. The rest is not constrained.
   unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands();
 
-  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineFunction &MF = *MI.getMF();
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -419,16 +431,20 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
     }
     unsigned OrigReg = MO.getReg();
     unsigned NewReg = *NewRegs.begin();
-    DEBUG(dbgs() << " changed, replace " << PrintReg(OrigReg, nullptr));
+    DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
     MO.setReg(NewReg);
-    DEBUG(dbgs() << " with " << PrintReg(NewReg, nullptr));
+    DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
 
     // The OperandsMapper creates plain scalar, we may have to fix that.
     // Check if the types match and if not, fix that.
     LLT OrigTy = MRI.getType(OrigReg);
     LLT NewTy = MRI.getType(NewReg);
     if (OrigTy != NewTy) {
-      assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() &&
+      // The default mapping is not supposed to change the size of
+      // the storage. However, right now we don't necessarily bump all
+      // the types to storage size. For instance, we can consider
+      // s16 G_AND legal whereas the storage size is going to be 32.
+      assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&
              "Types with difference size cannot be handled by the default "
              "mapping");
       DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
@@ -441,13 +457,13 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
 
 unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
                                          const MachineRegisterInfo &MRI,
-                                         const TargetRegisterInfo &TRI) {
+                                         const TargetRegisterInfo &TRI) const {
   const TargetRegisterClass *RC = nullptr;
   if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
     // The size is not directly available for physical registers.
     // Instead, we need to access a register class that contains Reg and
     // get the size of that register class.
-    RC = TRI.getMinimalPhysRegClass(Reg);
+    RC = &getMinimalPhysRegClass(Reg, TRI);
   } else {
     LLT Ty = MRI.getType(Reg);
     unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
@@ -543,10 +559,11 @@ bool RegisterBankInfo::InstructionMapping::verify(
   // For PHI, we only care about mapping the definition.
   assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) &&
          "NumOperands must match, see constructor");
-  assert(MI.getParent() && MI.getParent()->getParent() &&
+  assert(MI.getParent() && MI.getMF() &&
          "MI must be connected to a MachineFunction");
-  const MachineFunction &MF = *MI.getParent()->getParent();
-  (void)MF;
+  const MachineFunction &MF = *MI.getMF();
+  const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
+  (void)RBI;
 
   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
     const MachineOperand &MO = MI.getOperand(Idx);
@@ -564,7 +581,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
     (void)MOMapping;
     // Register size in bits.
     // This size must match what the mapping expects.
-    assert(MOMapping.verify(getSizeInBits(
+    assert(MOMapping.verify(RBI->getSizeInBits(
                Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) &&
            "Value mapping is invalid");
   }
@@ -725,8 +742,8 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
   // If we have a function, we can pretty print the name of the registers.
   // Otherwise we will print the raw numbers.
   const TargetRegisterInfo *TRI =
-      getMI().getParent() && getMI().getParent()->getParent()
-          ? getMI().getParent()->getParent()->getSubtarget().getRegisterInfo()
+      getMI().getParent() && getMI().getMF()
+          ? getMI().getMF()->getSubtarget().getRegisterInfo()
           : nullptr;
   bool IsFirst = true;
   for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
@@ -735,13 +752,13 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
     if (!IsFirst)
       OS << ", ";
     IsFirst = false;
-    OS << '(' << PrintReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
+    OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
     bool IsFirstNewVReg = true;
     for (unsigned VReg : getVRegs(Idx)) {
       if (!IsFirstNewVReg)
         OS << ", ";
       IsFirstNewVReg = false;
-      OS << PrintReg(VReg, TRI);
+      OS << printReg(VReg, TRI);
     }
     OS << "])";
   }
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 5ecaf5c563f8..ef990b49aceb 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -17,10 +17,10 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 
 #define DEBUG_TYPE "globalisel-utils"
 
@@ -99,7 +99,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
                               const MachineInstr &MI) {
   MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ",
                                     MI.getDebugLoc(), MI.getParent());
-  R << Msg << ": " << ore::MNV("Inst", MI);
+  R << Msg;
+  // Printing MI is expensive;  only do it if expensive remarks are enabled.
+  if (MORE.allowExtraAnalysis(PassName))
+    R << ": " << ore::MNV("Inst", MI);
   reportGISelFailure(MF, TPC, MORE, R);
 }
 
@@ -126,3 +129,19 @@ const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
     return nullptr;
   return MI->getOperand(1).getFPImm();
 }
+
+llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
+                                       const MachineRegisterInfo &MRI) {
+  auto *DefMI = MRI.getVRegDef(Reg);
+  auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
+  if (!DstTy.isValid())
+    return nullptr;
+  while (DefMI->getOpcode() == TargetOpcode::COPY) {
+    unsigned SrcReg = DefMI->getOperand(1).getReg();
+    auto SrcTy = MRI.getType(SrcReg);
+    if (!SrcTy.isValid() || SrcTy != DstTy)
+      break;
+    DefMI = MRI.getVRegDef(SrcReg);
+  }
+  return DefMI->getOpcode() == Opcode ? DefMI : nullptr;
+}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index c6ca49ce24d7..8b9545da914e 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -1,4 +1,4 @@
-//===-- GlobalMerge.cpp - Internal globals merging  -----------------------===//
+//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,6 +6,7 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
 // This pass merges globals with internal linkage into one. This way all the
 // globals which were merged into a biggest one can be addressed using offsets
 // from the same base pointer (no need for separate base pointer for each of the
@@ -57,30 +58,45 @@
 // - it can increase register pressure when the uses are disparate enough.
 // 
 // We use heuristics to discover the best global grouping we can (cf cl::opts).
+//
 // ===---------------------------------------------------------------------===//
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "global-merge"
@@ -117,9 +133,12 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
      cl::desc("Enable global merge pass on external linkage"));
 
 STATISTIC(NumMerged, "Number of globals merged");
+
 namespace {
+
   class GlobalMerge : public FunctionPass {
-    const TargetMachine *TM;
+    const TargetMachine *TM = nullptr;
+
     // FIXME: Infer the maximum possible offset depending on the actual users
     // (these max offsets are different for the users inside Thumb or ARM
     // functions), see the code that passes in the offset in the ARM backend
@@ -130,15 +149,16 @@ namespace {
     /// Currently, this applies a dead simple heuristic: only consider globals
     /// used in minsize functions for merging.
     /// FIXME: This could learn about optsize, and be used in the cost model.
-    bool OnlyOptimizeForSize;
+    bool OnlyOptimizeForSize = false;
 
     /// Whether we should merge global variables that have external linkage.
-    bool MergeExternalGlobals;
+    bool MergeExternalGlobals = false;
 
     bool IsMachO;
 
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                  Module &M, bool isConst, unsigned AddrSpace) const;
+
     /// \brief Merge everything in \p Globals for which the corresponding bit
     /// in \p GlobalSet is set.
     bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
@@ -164,9 +184,9 @@ namespace {
 
   public:
     static char ID;             // Pass identification, replacement for typeid.
+
     explicit GlobalMerge()
-        : FunctionPass(ID), TM(nullptr), MaxOffset(GlobalMergeMaxOffset),
-          OnlyOptimizeForSize(false), MergeExternalGlobals(false) {
+        : FunctionPass(ID), MaxOffset(GlobalMergeMaxOffset) {
       initializeGlobalMergePass(*PassRegistry::getPassRegistry());
     }
 
@@ -189,9 +209,11 @@ namespace {
       FunctionPass::getAnalysisUsage(AU);
     }
   };
+
 } // end anonymous namespace
 
 char GlobalMerge::ID = 0;
+
 INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
 
 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
@@ -231,9 +253,10 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
 
   // We keep track of the sets of globals used together "close enough".
   struct UsedGlobalSet {
-    UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {}
     BitVector Globals;
-    unsigned UsageCount;
+    unsigned UsageCount = 1;
+
+    UsedGlobalSet(size_t Size) : Globals(Size) {}
   };
 
   // Each set is unique in UsedGlobalSets.
@@ -363,7 +386,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
   //
   // Multiply that by the size of the set to give us a crude profitability
   // metric.
-  std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
+  std::stable_sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
             [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
               return UGS1.Globals.count() * UGS1.UsageCount <
                      UGS2.Globals.count() * UGS2.UsageCount;
@@ -545,7 +568,7 @@ bool GlobalMerge::doInitialization(Module &M) {
   IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
 
   auto &DL = M.getDataLayout();
-  DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+  DenseMap<unsigned, SmallVector<GlobalVariable *, 16>> Globals, ConstGlobals,
                                                         BSSGlobals;
   bool Changed = false;
   setMustKeepGlobalVariables(M);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ff8405366173..a22ce0dab9c2 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1,4 +1,4 @@
-//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//===- IfConversion.cpp - Machine code if conversion pass -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,26 +16,41 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
+#include <cassert>
+#include <functional>
+#include <iterator>
+#include <memory>
 #include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -77,6 +92,7 @@ STATISTIC(NumDupBBs,       "Number of duplicated blocks");
 STATISTIC(NumUnpred,       "Number of true blocks of diamonds unpredicated");
 
 namespace {
+
   class IfConverter : public MachineFunctionPass {
     enum IfcvtKind {
       ICNotClassfied,  // BB data valid, but not classified.
@@ -125,21 +141,20 @@ namespace {
       bool IsUnpredicable  : 1;
       bool CannotBeCopied  : 1;
       bool ClobbersPred    : 1;
-      unsigned NonPredSize;
-      unsigned ExtraCost;
-      unsigned ExtraCost2;
-      MachineBasicBlock *BB;
-      MachineBasicBlock *TrueBB;
-      MachineBasicBlock *FalseBB;
+      unsigned NonPredSize = 0;
+      unsigned ExtraCost = 0;
+      unsigned ExtraCost2 = 0;
+      MachineBasicBlock *BB = nullptr;
+      MachineBasicBlock *TrueBB = nullptr;
+      MachineBasicBlock *FalseBB = nullptr;
       SmallVector<MachineOperand, 4> BrCond;
       SmallVector<MachineOperand, 4> Predicate;
+
       BBInfo() : IsDone(false), IsBeingAnalyzed(false),
                  IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
                  IsBrReversible(false), HasFallThrough(false),
                  IsUnpredicable(false), CannotBeCopied(false),
-                 ClobbersPred(false), NonPredSize(0), ExtraCost(0),
-                 ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
-                 FalseBB(nullptr) {}
+                 ClobbersPred(false) {}
     };
 
     /// Record information about pending if-conversions to attempt:
@@ -161,6 +176,7 @@ namespace {
       bool NeedSubsumption : 1;
       bool TClobbersPred : 1;
       bool FClobbersPred : 1;
+
       IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
                  bool tc = false, bool fc = false)
         : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
@@ -179,17 +195,17 @@ namespace {
     MachineRegisterInfo *MRI;
 
     LivePhysRegs Redefs;
-    LivePhysRegs DontKill;
 
     bool PreRegAlloc;
     bool MadeChange;
-    int FnNum;
+    int FnNum = -1;
     std::function<bool(const MachineFunction &)> PredicateFtor;
 
   public:
     static char ID;
+
     IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr)
-        : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
+        : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
       initializeIfConverterPass(*PassRegistry::getPassRegistry());
     }
 
@@ -242,7 +258,6 @@ namespace {
     void AnalyzeBlocks(MachineFunction &MF,
                        std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
     void InvalidatePreds(MachineBasicBlock &MBB);
-    void RemoveExtraEdges(BBInfo &BBI);
     bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
     bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
     bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -311,8 +326,9 @@ namespace {
     }
   };
 
-  char IfConverter::ID = 0;
-}
+} // end anonymous namespace
+
+char IfConverter::ID = 0;
 
 char &llvm::IfConverterID = IfConverter::ID;
 
@@ -321,7 +337,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
 
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
+  if (skipFunction(MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
     return false;
 
   const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -390,12 +406,12 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       case ICSimpleFalse: {
         bool isFalse = Kind == ICSimpleFalse;
         if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
-        DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
-                                            " false" : "")
-                     << "): BB#" << BBI.BB->getNumber() << " ("
-                     << ((Kind == ICSimpleFalse)
-                         ? BBI.FalseBB->getNumber()
-                         : BBI.TrueBB->getNumber()) << ") ");
+        DEBUG(dbgs() << "Ifcvt (Simple"
+                     << (Kind == ICSimpleFalse ? " false" : "")
+                     << "): " << printMBBReference(*BBI.BB) << " ("
+                     << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber()
+                                                 : BBI.TrueBB->getNumber())
+                     << ") ");
         RetVal = IfConvertSimple(BBI, Kind);
         DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
@@ -419,9 +435,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
           DEBUG(dbgs() << " false");
         if (isRev)
           DEBUG(dbgs() << " rev");
-        DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
-                     << BBI.TrueBB->getNumber() << ",F:"
-                     << BBI.FalseBB->getNumber() << ") ");
+        DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB)
+                     << " (T:" << BBI.TrueBB->getNumber()
+                     << ",F:" << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertTriangle(BBI, Kind);
         DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
@@ -435,24 +451,22 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         }
         break;
       }
-      case ICDiamond: {
+      case ICDiamond:
         if (DisableDiamond) break;
-        DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
-                     << BBI.TrueBB->getNumber() << ",F:"
-                     << BBI.FalseBB->getNumber() << ") ");
+        DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB)
+                     << " (T:" << BBI.TrueBB->getNumber()
+                     << ",F:" << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
                                   Token->TClobbersPred,
                                   Token->FClobbersPred);
         DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) ++NumDiamonds;
         break;
-      }
-      case ICForkedDiamond: {
+      case ICForkedDiamond:
         if (DisableForkedDiamond) break;
-        DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
-                     << BBI.BB->getNumber() << " (T:"
-                     << BBI.TrueBB->getNumber() << ",F:"
-                     << BBI.FalseBB->getNumber() << ") ");
+        DEBUG(dbgs() << "Ifcvt (Forked Diamond): " << printMBBReference(*BBI.BB)
+                     << " (T:" << BBI.TrueBB->getNumber()
+                     << ",F:" << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
                                       Token->TClobbersPred,
                                       Token->FClobbersPred);
@@ -460,7 +474,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         if (RetVal) ++NumForkedDiamonds;
         break;
       }
-      }
+
+      if (RetVal && MRI->tracksLiveness())
+        recomputeLivenessFlags(*BBI.BB);
 
       Change |= RetVal;
 
@@ -616,7 +632,6 @@ bool IfConverter::CountDuplicatedInstructions(
     unsigned &Dups1, unsigned &Dups2,
     MachineBasicBlock &TBB, MachineBasicBlock &FBB,
     bool SkipUnconditionalBranches) const {
-
   while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
     TIB = skipDebugInstructionsForward(TIB, TIE);
@@ -1342,19 +1357,10 @@ static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB,
   TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl);
 }
 
-/// Remove true / false edges if either / both are no longer successors.
-void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
-  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
-  SmallVector<MachineOperand, 4> Cond;
-  if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond))
-    BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
-}
-
 /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
 /// values defined in MI which are also live/used by MI.
 static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
-  const TargetRegisterInfo *TRI = MI.getParent()->getParent()
-    ->getSubtarget().getRegisterInfo();
+  const TargetRegisterInfo *TRI = MI.getMF()->getSubtarget().getRegisterInfo();
 
   // Before stepping forward past MI, remember which regs were live
   // before MI. This is needed to set the Undef flag only when reg is
@@ -1374,7 +1380,7 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
     unsigned Reg = Clobber.first;
     MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second);
     MachineInstr *OpMI = Op.getParent();
-    MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
+    MachineInstrBuilder MIB(*OpMI->getMF(), OpMI);
     if (Op.isRegMask()) {
       // First handle regmasks.  They clobber any entries in the mask which
       // means that we need a def for those registers.
@@ -1389,13 +1395,6 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
       MIB.addReg(Reg, RegState::Implicit | RegState::Define);
       continue;
     }
-    assert(Op.isReg() && "Register operand required");
-    if (Op.isDead()) {
-      // If we found a dead def, but it needs to be live, then remove the dead
-      // flag.
-      if (Redefs.contains(Op.getReg()))
-        Op.setIsDead(false);
-    }
     if (LiveBeforeMI.count(Reg))
       MIB.addReg(Reg, RegState::Implicit);
     else {
@@ -1412,26 +1411,6 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
   }
 }
 
-/// Remove kill flags from operands with a registers in the \p DontKill set.
-static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
-  for (MIBundleOperands O(MI); O.isValid(); ++O) {
-    if (!O->isReg() || !O->isKill())
-      continue;
-    if (DontKill.contains(O->getReg()))
-      O->setIsKill(false);
-  }
-}
-
-/// Walks a range of machine instructions and removes kill flags for registers
-/// in the \p DontKill set.
-static void RemoveKills(MachineBasicBlock::iterator I,
-                        MachineBasicBlock::iterator E,
-                        const LivePhysRegs &DontKill,
-                        const MCRegisterInfo &MCRI) {
-  for (MachineInstr &MI : make_range(I, E))
-    RemoveKills(MI, DontKill);
-}
-
 /// If convert a simple (split, no rejoin) sub-CFG.
 bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
@@ -1462,16 +1441,12 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
       llvm_unreachable("Unable to reverse branch condition!");
 
   Redefs.init(*TRI);
-  DontKill.init(*TRI);
 
   if (MRI->tracksLiveness()) {
     // Initialize liveins to the first BB. These are potentiall redefined by
     // predicated instructions.
     Redefs.addLiveIns(CvtMBB);
     Redefs.addLiveIns(NextMBB);
-    // Compute a set of registers which must not be killed by instructions in
-    // BB1: This is everything live-in to BB2.
-    DontKill.addLiveIns(NextMBB);
   }
 
   // Remove the branches from the entry so we can add the contents of the true
@@ -1483,15 +1458,14 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
 
-    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
-    // explicitly remove CvtBBI as a successor.
+    // Keep the CFG updated.
     BBI.BB->removeSuccessor(&CvtMBB, true);
   } else {
     // Predicate the instructions in the true block.
-    RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
     PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
 
-    // Merge converted block into entry block.
+    // Merge converted block into entry block. The BB to Cvt edge is removed
+    // by MergeBlocks.
     MergeBlocks(BBI, *CvtBBI);
   }
 
@@ -1512,8 +1486,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
     IterIfcvt = false;
   }
 
-  RemoveExtraEdges(BBI);
-
   // Update block info. BB can be iteratively if-converted.
   if (!IterIfcvt)
     BBI.IsDone = true;
@@ -1578,8 +1550,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     Redefs.addLiveIns(NextMBB);
   }
 
-  DontKill.clear();
-
   bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
   BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
 
@@ -1599,10 +1569,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
-
-    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
-    // explicitly remove CvtBBI as a successor.
-    BBI.BB->removeSuccessor(&CvtMBB, true);
   } else {
     // Predicate the 'true' block after removing its branch.
     CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
@@ -1612,6 +1578,9 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     MergeBlocks(BBI, *CvtBBI, false);
   }
 
+  // Keep the CFG updated.
+  BBI.BB->removeSuccessor(&CvtMBB, true);
+
   // If 'true' block has a 'false' successor, add an exit branch to it.
   if (HasEarlyExit) {
     SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
@@ -1659,8 +1628,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     IterIfcvt = false;
   }
 
-  RemoveExtraEdges(BBI);
-
   // Update block info. BB can be iteratively if-converted.
   if (!IterIfcvt)
     BBI.IsDone = true;
@@ -1765,14 +1732,7 @@ bool IfConverter::IfConvertDiamondCommon(
       --NumDups1;
   }
 
-  // Compute a set of registers which must not be killed by instructions in BB1:
-  // This is everything used+live in BB2 after the duplicated instructions. We
-  // can compute this set by simulating liveness backwards from the end of BB2.
-  DontKill.init(*TRI);
   if (MRI->tracksLiveness()) {
-    for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
-      DontKill.stepBackward(MI);
-
     for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
       SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
       Redefs.stepForward(MI, Dummy);
@@ -1802,10 +1762,6 @@ bool IfConverter::IfConvertDiamondCommon(
   }
   MBB1.erase(DI1, MBB1.end());
 
-  // Kill flags in the true block for registers living into the false block
-  // must be removed.
-  RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI);
-
   DI2 = BBI2->BB->end();
   // The branches have been checked to match. Skip over the branch in the false
   // block so that we don't try to predicate it.
@@ -1923,8 +1879,6 @@ bool IfConverter::IfConvertForkedDiamond(
   TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
                     TrueBBI.BrCond, dl);
 
-  RemoveExtraEdges(BBI);
-
   // Update block info.
   BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
   InvalidatePreds(*BBI.BB);
@@ -1961,6 +1915,11 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // fold the tail block in as well. Otherwise, unless it falls through to the
   // tail, add a unconditional branch to it.
   if (TailBB) {
+    // We need to remove the edges to the true and false blocks manually since
+    // we didn't let IfConvertDiamondCommon update the CFG.
+    BBI.BB->removeSuccessor(TrueBBI.BB);
+    BBI.BB->removeSuccessor(FalseBBI.BB, true);
+
     BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
     bool CanMergeTail = !TailBBI.HasFallThrough &&
       !TailBBI.BB->hasAddressTaken();
@@ -1990,13 +1949,6 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
     }
   }
 
-  // RemoveExtraEdges won't work if the block has an unanalyzable branch,
-  // which can happen here if TailBB is unanalyzable and is merged, so
-  // explicitly remove BBI1 and BBI2 as successors.
-  BBI.BB->removeSuccessor(TrueBBI.BB);
-  BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
-  RemoveExtraEdges(BBI);
-
   // Update block info.
   BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
   InvalidatePreds(*BBI.BB);
@@ -2101,10 +2053,6 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     // If the predicated instruction now redefines a register as the result of
     // if-conversion, add an implicit kill.
     UpdatePredRedefs(*MI, Redefs);
-
-    // Some kill flags may not be correct anymore.
-    if (!DontKill.empty())
-      RemoveKills(*MI, DontKill);
   }
 
   if (!IgnoreBr) {
@@ -2133,7 +2081,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 /// Move all instructions from FromBB to the end of ToBB.  This will leave
 /// FromBB as an empty block, so remove all of its successor edges except for
 /// the fall-through edge.  If AddEdges is true, i.e., when FromBBI's branch is
-/// being moved, add those successor edges to ToBBI.
+/// being moved, add those successor edges to ToBBI and remove the old edge
+/// from ToBBI to FromBBI.
 void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   MachineBasicBlock &FromMBB = *FromBBI.BB;
   assert(!FromMBB.hasAddressTaken() &&
@@ -2165,12 +2114,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   // AddEdges is true and FromMBB is a successor of ToBBI.BB.
   auto To2FromProb = BranchProbability::getZero();
   if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) {
+    // Remove the old edge but remember the edge probability so we can calculate
+    // the correct weights on the new edges being added further down.
     To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB);
-    // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the
-    // edge probability being merged to other edges when this edge is removed
-    // later.
-    ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB),
-                                 BranchProbability::getZero());
+    ToBBI.BB->removeSuccessor(&FromMBB);
   }
 
   for (MachineBasicBlock *Succ : FromSuccs) {
@@ -2229,9 +2176,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
     }
   }
 
-  // Now FromBBI always falls through to the next block!
-  if (NBB && !FromMBB.isSuccessor(NBB))
-    FromMBB.addSuccessor(NBB);
+  // Move the now empty FromMBB out of the way to the end of the function so
+  // it doesn't interfere with fallthrough checks done by canFallThroughTo().
+  MachineBasicBlock *Last = &*FromMBB.getParent()->rbegin();
+  if (Last != &FromMBB)
+    FromMBB.moveAfter(Last);
 
   // Normalize the probabilities of ToBBI.BB's successors with all adjustment
   // we've done above.
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index e308f49ec4e8..308b6d293d3d 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -1,4 +1,4 @@
-//===-- ImplicitNullChecks.cpp - Fold null checks into memory accesses ----===//
+//===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -26,38 +26,50 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/CodeGen/FaultMaps.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Instruction.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
 
 using namespace llvm;
 
 static cl::opt<int> PageSize("imp-null-check-page-size",
                              cl::desc("The page size of the target in bytes"),
-                             cl::init(4096));
+                             cl::init(4096), cl::Hidden);
 
 static cl::opt<unsigned> MaxInstsToConsider(
     "imp-null-max-insts-to-consider",
     cl::desc("The max number of instructions to consider hoisting loads over "
              "(the algorithm is quadratic over this number)"),
-    cl::init(8));
+    cl::Hidden, cl::init(8));
 
 #define DEBUG_TYPE "implicit-null-checks"
 
@@ -152,7 +164,6 @@ class ImplicitNullChecks : public MachineFunctionPass {
   const TargetInstrInfo *TII = nullptr;
   const TargetRegisterInfo *TRI = nullptr;
   AliasAnalysis *AA = nullptr;
-  MachineModuleInfo *MMI = nullptr;
   MachineFrameInfo *MFI = nullptr;
 
   bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
@@ -166,6 +177,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
     AR_MayAlias,
     AR_WillAliasEverything
   };
+
   /// Returns AR_NoAlias if \p MI memory operation does not alias with
   /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
   /// they may alias and any further memory operation may alias with \p PrevMI.
@@ -176,6 +188,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
     SR_Unsuitable,
     SR_Impossible
   };
+
   /// Return SR_Suitable if \p MI a memory operation that can be used to
   /// implicitly null check the value in \p PointerReg, SR_Unsuitable if
   /// \p MI cannot be used to null check and SR_Impossible if there is
@@ -200,6 +213,7 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
+
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AAResultsWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
@@ -211,7 +225,7 @@ public:
   }
 };
 
-}
+} // end anonymous namespace
 
 bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
   if (MI->isCall() || MI->hasUnmodeledSideEffects())
@@ -230,7 +244,7 @@ ImplicitNullChecks::DependenceResult
 ImplicitNullChecks::computeDependence(const MachineInstr *MI,
                                       ArrayRef<MachineInstr *> Block) {
   assert(llvm::all_of(Block, canHandle) && "Check this first!");
-  assert(!llvm::is_contained(Block, MI) && "Block must be exclusive of MI!");
+  assert(!is_contained(Block, MI) && "Block must be exclusive of MI!");
 
   Optional<ArrayRef<MachineInstr *>::iterator> Dep;
 
@@ -280,7 +294,6 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
 bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   TRI = MF.getRegInfo().getTargetRegisterInfo();
-  MMI = &MF.getMMI();
   MFI = &MF.getFrameInfo();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
@@ -356,7 +369,7 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
   // We want the mem access to be issued at a sane offset from PointerReg,
   // so that if PointerReg is null then the access reliably page faults.
   if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
-        Offset < PageSize))
+        -PageSize < Offset && Offset < PageSize))
     return SR_Unsuitable;
 
   // Finally, check whether the current memory access aliases with previous one.
@@ -390,8 +403,10 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
   // We don't want to reason about speculating loads.  Note -- at this point
   // we should have already filtered out all of the other non-speculatable
   // things, like calls and stores.
+  // We also do not want to hoist stores because it might change the memory
+  // while the FaultingMI may result in faulting.
   assert(canHandle(DependenceMI) && "Should never have reached here!");
-  if (DependenceMI->mayLoad())
+  if (DependenceMI->mayLoadOrStore())
     return false;
 
   for (auto &DependenceMO : DependenceMI->operands()) {
@@ -406,7 +421,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
     //    test %rcx, %rcx
     //    je _null_block
     //  _non_null_block:
-    //    %rdx<def> = INST
+    //    %rdx = INST
     //    ...
     //
     // This restriction does not apply to the faulting load inst because in
@@ -441,7 +456,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
 /// NullCheckList and return true, else return false.
 bool ImplicitNullChecks::analyzeBlockForNullChecks(
     MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
-  typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+  using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
 
   MDNode *BranchMD = nullptr;
   if (auto *BB = MBB.getBasicBlock())
@@ -483,7 +498,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
 
   // Starting with a code fragment like:
   //
-  //   test %RAX, %RAX
+  //   test %rax, %rax
   //   jne LblNotNull
   //
   //  LblNull:
@@ -493,13 +508,13 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   //   Inst0
   //   Inst1
   //   ...
-  //   Def = Load (%RAX + <offset>)
+  //   Def = Load (%rax + <offset>)
   //   ...
   //
   //
   // we want to end up with
   //
-  //   Def = FaultingLoad (%RAX + <offset>), LblNull
+  //   Def = FaultingLoad (%rax + <offset>), LblNull
   //   jmp LblNotNull ;; explicit or fallthrough
   //
   //  LblNotNull:
@@ -513,11 +528,11 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   //
   // To see why this is legal, consider the two possibilities:
   //
-  //  1. %RAX is null: since we constrain <offset> to be less than PageSize, the
+  //  1. %rax is null: since we constrain <offset> to be less than PageSize, the
   //     load instruction dereferences the null page, causing a segmentation
   //     fault.
   //
-  //  2. %RAX is not null: in this case we know that the load cannot fault, as
+  //  2. %rax is not null: in this case we know that the load cannot fault, as
   //     otherwise the load would've faulted in the original program too and the
   //     original program would've been undefined.
   //
@@ -555,7 +570,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
     }
 
     // If MI re-defines the PointerReg then we cannot move further.
-    if (any_of(MI.operands(), [&](MachineOperand &MO) {
+    if (llvm::any_of(MI.operands(), [&](MachineOperand &MO) {
           return MO.isReg() && MO.getReg() && MO.isDef() &&
                  TRI->regsOverlap(MO.getReg(), PointerReg);
         }))
@@ -674,9 +689,10 @@ void ImplicitNullChecks::rewriteNullChecks(
   }
 }
 
-
 char ImplicitNullChecks::ID = 0;
+
 char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
+
 INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE,
                       "Implicit null checks", false, false)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index eda4f74c7874..1aaf7a0ceef8 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -1,4 +1,4 @@
-//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//===- InlineSpiller.cpp - Insert spills and restores inline --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,31 +12,52 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LiveRangeCalc.h"
 #include "Spiller.h"
 #include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -56,6 +77,7 @@ static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
                                      cl::desc("Disable inline spill hoisting"));
 
 namespace {
+
 class HoistSpillHelper : private LiveRangeEdit::Delegate {
   MachineFunction &MF;
   LiveIntervals &LIS;
@@ -64,7 +86,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
   MachineDominatorTree &MDT;
   MachineLoopInfo &Loops;
   VirtRegMap &VRM;
-  MachineFrameInfo &MFI;
   MachineRegisterInfo &MRI;
   const TargetInstrInfo &TII;
   const TargetRegisterInfo &TRI;
@@ -72,13 +93,17 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
 
   InsertPointAnalysis IPA;
 
-  // Map from StackSlot to its original register.
-  DenseMap<int, unsigned> StackSlotToReg;
+  // Map from StackSlot to the LiveInterval of the original register.
+  // Note the LiveInterval of the original register may have been deleted
+  // after it is spilled. We keep a copy here to track the range where
+  // spills can be moved.
+  DenseMap<int, std::unique_ptr<LiveInterval>> StackSlotToOrigLI;
+
   // Map from pair of (StackSlot and Original VNI) to a set of spills which
   // have the same stackslot and have equal values defined by Original VNI.
   // These spills are mergeable and are hoist candiates.
-  typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>
-      MergeableSpillsMap;
+  using MergeableSpillsMap =
+      MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>;
   MergeableSpillsMap MergeableSpills;
 
   /// This is the map from original register to a set containing all its
@@ -86,8 +111,8 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
   /// sibling there and use it as the source of the new spill.
   DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
 
-  bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB,
-                     unsigned &LiveReg);
+  bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
+                     MachineBasicBlock &BB, unsigned &LiveReg);
 
   void rmRedundantSpills(
       SmallPtrSet<MachineInstr *, 16> &Spills,
@@ -101,7 +126,7 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
       DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
       DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
 
-  void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
+  void runHoistSpills(LiveInterval &OrigLI, VNInfo &OrigVNI,
                       SmallPtrSet<MachineInstr *, 16> &Spills,
                       SmallVectorImpl<MachineInstr *> &SpillsToRm,
                       DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
@@ -114,8 +139,7 @@ public:
         AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
         MDT(pass.getAnalysis<MachineDominatorTree>()),
         Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
-        MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
-        TII(*mf.getSubtarget().getInstrInfo()),
+        MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
         IPA(LIS, mf.getNumBlockIDs()) {}
@@ -135,7 +159,6 @@ class InlineSpiller : public Spiller {
   MachineDominatorTree &MDT;
   MachineLoopInfo &Loops;
   VirtRegMap &VRM;
-  MachineFrameInfo &MFI;
   MachineRegisterInfo &MRI;
   const TargetInstrInfo &TII;
   const TargetRegisterInfo &TRI;
@@ -163,7 +186,7 @@ class InlineSpiller : public Spiller {
   // Object records spills information and does the hoisting.
   HoistSpillHelper HSpiller;
 
-  ~InlineSpiller() override {}
+  ~InlineSpiller() override = default;
 
 public:
   InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
@@ -172,8 +195,7 @@ public:
         AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
         MDT(pass.getAnalysis<MachineDominatorTree>()),
         Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
-        MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
-        TII(*mf.getSubtarget().getInstrInfo()),
+        MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
         HSpiller(pass, mf, vrm) {}
@@ -196,7 +218,7 @@ private:
   void reMaterializeAll();
 
   bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
-  bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
+  bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>,
                          MachineInstr *LoadMI = nullptr);
   void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI);
   void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI);
@@ -204,19 +226,17 @@ private:
   void spillAroundUses(unsigned Reg);
   void spillAll();
 };
-}
 
-namespace llvm {
+} // end anonymous namespace
 
-Spiller::~Spiller() { }
-void Spiller::anchor() { }
+Spiller::~Spiller() = default;
 
-Spiller *createInlineSpiller(MachineFunctionPass &pass,
-                             MachineFunction &mf,
-                             VirtRegMap &vrm) {
-  return new InlineSpiller(pass, mf, vrm);
-}
+void Spiller::anchor() {}
 
+Spiller *llvm::createInlineSpiller(MachineFunctionPass &pass,
+                                   MachineFunction &mf,
+                                   VirtRegMap &vrm) {
+  return new InlineSpiller(pass, mf, vrm);
 }
 
 //===----------------------------------------------------------------------===//
@@ -340,7 +360,7 @@ bool InlineSpiller::isSibling(unsigned Reg) {
 ///
 ///   x = def
 ///   spill x
-///   y = use x<kill>
+///   y = use killed x
 ///
 /// This hoist only helps when the copy kills its source.
 ///
@@ -457,7 +477,6 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
   } while (!WorkList.empty());
 }
 
-
 //===----------------------------------------------------------------------===//
 //                            Rematerialization
 //===----------------------------------------------------------------------===//
@@ -496,7 +515,6 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
 
 /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
 bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
-
   // Analyze instruction
   SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
   MIBundleOperands::VirtRegInfo RI =
@@ -654,7 +672,6 @@ void InlineSpiller::reMaterializeAll() {
   DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
 }
 
-
 //===----------------------------------------------------------------------===//
 //                                 Spilling
 //===----------------------------------------------------------------------===//
@@ -731,7 +748,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
 /// @param LoadMI Load instruction to use instead of stack slot when non-null.
 /// @return       True on success.
 bool InlineSpiller::
-foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
                   MachineInstr *LoadMI) {
   if (Ops.empty())
     return false;
@@ -904,7 +921,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
 
 /// spillAroundUses - insert spill code around each use of Reg.
 void InlineSpiller::spillAroundUses(unsigned Reg) {
-  DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+  DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n');
   LiveInterval &OldLI = LIS.getInterval(Reg);
 
   // Iterate over instructions using Reg.
@@ -1060,7 +1077,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
   DEBUG(dbgs() << "Inline spilling "
                << TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
                << ':' << edit.getParent()
-               << "\nFrom original " << PrintReg(Original) << '\n');
+               << "\nFrom original " << printReg(Original) << '\n');
   assert(edit.getParent().isSpillable() &&
          "Attempting to spill already spilled value.");
   assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
@@ -1076,49 +1093,52 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
 }
 
 /// Optimizations after all the reg selections and spills are done.
-///
 void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); }
 
 /// When a spill is inserted, add the spill to MergeableSpills map.
-///
 void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
                                             unsigned Original) {
-  StackSlotToReg[StackSlot] = Original;
+  BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+  LiveInterval &OrigLI = LIS.getInterval(Original);
+  // save a copy of LiveInterval in StackSlotToOrigLI because the original
+  // LiveInterval may be cleared after all its references are spilled.
+  if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
+    auto LI = llvm::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
+    LI->assign(OrigLI, Allocator);
+    StackSlotToOrigLI[StackSlot] = std::move(LI);
+  }
   SlotIndex Idx = LIS.getInstructionIndex(Spill);
-  VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+  VNInfo *OrigVNI = StackSlotToOrigLI[StackSlot]->getVNInfoAt(Idx.getRegSlot());
   std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
   MergeableSpills[MIdx].insert(&Spill);
 }
 
 /// When a spill is removed, remove the spill from MergeableSpills map.
 /// Return true if the spill is removed successfully.
-///
 bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
                                              int StackSlot) {
-  int Original = StackSlotToReg[StackSlot];
-  if (!Original)
+  auto It = StackSlotToOrigLI.find(StackSlot);
+  if (It == StackSlotToOrigLI.end())
     return false;
   SlotIndex Idx = LIS.getInstructionIndex(Spill);
-  VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+  VNInfo *OrigVNI = It->second->getVNInfoAt(Idx.getRegSlot());
   std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
   return MergeableSpills[MIdx].erase(&Spill);
 }
 
 /// Check BB to see if it is a possible target BB to place a hoisted spill,
 /// i.e., there should be a living sibling of OrigReg at the insert point.
-///
-bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
+bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
                                      MachineBasicBlock &BB, unsigned &LiveReg) {
   SlotIndex Idx;
-  LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+  unsigned OrigReg = OrigLI.reg;
   MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
   if (MI != BB.end())
     Idx = LIS.getInstructionIndex(*MI);
   else
     Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
   SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
-  assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
-         "Unexpected VNI");
+  assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI");
 
   for (auto const SibReg : Siblings) {
     LiveInterval &LI = LIS.getInterval(SibReg);
@@ -1133,7 +1153,6 @@ bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
 
 /// Remove redundant spills in the same BB. Save those redundant spills in
 /// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
-///
 void HoistSpillHelper::rmRedundantSpills(
     SmallPtrSet<MachineInstr *, 16> &Spills,
     SmallVectorImpl<MachineInstr *> &SpillsToRm,
@@ -1166,7 +1185,6 @@ void HoistSpillHelper::rmRedundantSpills(
 /// time. \p SpillBBToSpill will be populated as part of the process and
 /// maps a basic block to the first store occurring in the basic block.
 /// \post SpillsToRm.union(Spills\@post) == Spills\@pre
-///
 void HoistSpillHelper::getVisitOrders(
     MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
     SmallVectorImpl<MachineDomTreeNode *> &Orders,
@@ -1254,9 +1272,9 @@ void HoistSpillHelper::getVisitOrders(
 /// Try to hoist spills according to BB hotness. The spills to removed will
 /// be saved in \p SpillsToRm. The spills to be inserted will be saved in
 /// \p SpillsToIns.
-///
 void HoistSpillHelper::runHoistSpills(
-    unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills,
+    LiveInterval &OrigLI, VNInfo &OrigVNI,
+    SmallPtrSet<MachineInstr *, 16> &Spills,
     SmallVectorImpl<MachineInstr *> &SpillsToRm,
     DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
   // Visit order of dominator tree nodes.
@@ -1280,9 +1298,10 @@ void HoistSpillHelper::runHoistSpills(
   // nodes set and the cost of all the spills inside those nodes.
   // The nodes set are the locations where spills are to be inserted
   // in the subtree of current node.
-  typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
-      NodesCostPair;
+  using NodesCostPair =
+      std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>;
   DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
+
   // Iterate Orders set in reverse order, which will be a bottom-up order
   // in the dominator tree. Once we visit a dom tree node, we know its
   // children have already been visited and the spill locations in the
@@ -1331,7 +1350,7 @@ void HoistSpillHelper::runHoistSpills(
 
     // Check whether Block is a possible candidate to insert spill.
     unsigned LiveReg = 0;
-    if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
+    if (!isSpillCandBB(OrigLI, OrigVNI, *Block, LiveReg))
       continue;
 
     // If there are multiple spills that could be merged, bias a little
@@ -1391,18 +1410,12 @@ void HoistSpillHelper::runHoistSpills(
 /// bottom-up order, and for each node, we will decide whether to hoist spills
 /// inside its subtree to that node. In this way, we can get benefit locally
 /// even if hoisting all the equal spills to one cold place is impossible.
-///
 void HoistSpillHelper::hoistAllSpills() {
   SmallVector<unsigned, 4> NewVRegs;
   LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
 
-  // Save the mapping between stackslot and its original reg.
-  DenseMap<int, unsigned> SlotToOrigReg;
   for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
-    int Slot = VRM.getStackSlot(Reg);
-    if (Slot != VirtRegMap::NO_STACK_SLOT)
-      SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
     unsigned Original = VRM.getPreSplitReg(Reg);
     if (!MRI.def_empty(Reg))
       Virt2SiblingsMap[Original].insert(Reg);
@@ -1411,8 +1424,7 @@ void HoistSpillHelper::hoistAllSpills() {
   // Each entry in MergeableSpills contains a spill set with equal values.
   for (auto &Ent : MergeableSpills) {
     int Slot = Ent.first.first;
-    unsigned OrigReg = SlotToOrigReg[Slot];
-    LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+    LiveInterval &OrigLI = *StackSlotToOrigLI[Slot];
     VNInfo *OrigVNI = Ent.first.second;
     SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
     if (Ent.second.empty())
@@ -1431,7 +1443,7 @@ void HoistSpillHelper::hoistAllSpills() {
     // SpillsToIns is the spill set to be newly inserted after hoisting.
     DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
 
-    runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
+    runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
 
     DEBUG({
       dbgs() << "Finally inserted spills in BB: ";
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index f8cc24724580..72227cc7bba9 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
+//===- InterferenceCache.cpp - Caching per-block interference -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,9 +12,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "InterferenceCache.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <tuple>
 
 using namespace llvm;
 
@@ -149,7 +161,7 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
   BlockInterference *BI = &Blocks[MBBNum];
   ArrayRef<SlotIndex> RegMaskSlots;
   ArrayRef<const uint32_t*> RegMaskBits;
-  for (;;) {
+  while (true) {
     BI->Tag = Tag;
     BI->First = BI->Last = SlotIndex();
 
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 18aa5c7c5ad6..160e2b16e294 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,47 +15,53 @@
 #ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
 #define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
 
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
 
 namespace llvm {
 
 class LiveIntervals;
+class MachineFunction;
+class TargetRegisterInfo;
 
 class LLVM_LIBRARY_VISIBILITY InterferenceCache {
-  const TargetRegisterInfo *TRI;
-  LiveIntervalUnion *LIUArray;
-  MachineFunction *MF;
-
   /// BlockInterference - information about the interference in a single basic
   /// block.
   struct BlockInterference {
-    BlockInterference() : Tag(0) {}
-    unsigned Tag;
+    unsigned Tag = 0;
     SlotIndex First;
     SlotIndex Last;
+
+    BlockInterference() {}
   };
 
   /// Entry - A cache entry containing interference information for all aliases
   /// of PhysReg in all basic blocks.
   class Entry {
     /// PhysReg - The register currently represented.
-    unsigned PhysReg;
+    unsigned PhysReg = 0;
 
     /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
     /// change.
-    unsigned Tag;
+    unsigned Tag = 0;
 
     /// RefCount - The total number of Cursor instances referring to this Entry.
-    unsigned RefCount;
+    unsigned RefCount = 0;
 
     /// MF - The current function.
     MachineFunction *MF;
 
     /// Indexes - Mapping block numbers to SlotIndex ranges.
-    SlotIndexes *Indexes;
+    SlotIndexes *Indexes = nullptr;
 
     /// LIS - Used for accessing register mask interference maps.
-    LiveIntervals *LIS;
+    LiveIntervals *LIS = nullptr;
 
     /// PrevPos - The previous position the iterators were moved to.
     SlotIndex PrevPos;
@@ -72,13 +78,12 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
       unsigned VirtTag;
 
       /// Fixed interference in RegUnit.
-      LiveRange *Fixed;
+      LiveRange *Fixed = nullptr;
 
       /// Iterator pointing into the fixed RegUnit interference.
       LiveInterval::iterator FixedI;
 
-      RegUnitInfo(LiveIntervalUnion &LIU)
-          : VirtTag(LIU.getTag()), Fixed(nullptr) {
+      RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()) {
         VirtI.setMap(LIU.getMap());
       }
     };
@@ -94,7 +99,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
     void update(unsigned MBBNum);
 
   public:
-    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {}
+    Entry() = default;
 
     void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
       assert(!hasRefs() && "Cannot clear cache entry with references");
@@ -134,13 +139,17 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
   // robin manner.
   enum { CacheEntries = 32 };
 
+  const TargetRegisterInfo *TRI = nullptr;
+  LiveIntervalUnion *LIUArray = nullptr;
+  MachineFunction *MF = nullptr;
+
   // Point to an entry for each physreg. The entry pointed to may not be up to
   // date, and it may have been reused for a different physreg.
-  unsigned char* PhysRegEntries;
-  size_t PhysRegEntriesCount;
+  unsigned char* PhysRegEntries = nullptr;
+  size_t PhysRegEntriesCount = 0;
 
   // Next round-robin entry to be picked.
-  unsigned RoundRobin;
+  unsigned RoundRobin = 0;
 
   // The actual cache entries.
   Entry Entries[CacheEntries];
@@ -149,9 +158,9 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
   Entry *get(unsigned PhysReg);
 
 public:
-  InterferenceCache()
-    : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr),
-      PhysRegEntriesCount(0), RoundRobin(0) {}
+  friend class Cursor;
+
+  InterferenceCache() = default;
 
   ~InterferenceCache() {
     free(PhysRegEntries);
@@ -160,8 +169,9 @@ public:
   void reinitPhysRegEntries();
 
   /// init - Prepare cache for a new function.
-  void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
-            const TargetRegisterInfo *);
+  void init(MachineFunction *mf, LiveIntervalUnion *liuarray,
+            SlotIndexes *indexes, LiveIntervals *lis,
+            const TargetRegisterInfo *tri);
 
   /// getMaxCursors - Return the maximum number of concurrent cursors that can
   /// be supported.
@@ -169,8 +179,8 @@ public:
 
   /// Cursor - The primary query interface for the block interference cache.
   class Cursor {
-    Entry *CacheEntry;
-    const BlockInterference *Current;
+    Entry *CacheEntry = nullptr;
+    const BlockInterference *Current = nullptr;
     static const BlockInterference NoInterference;
 
     void setEntry(Entry *E) {
@@ -186,10 +196,9 @@ public:
 
   public:
     /// Cursor - Create a dangling cursor.
-    Cursor() : CacheEntry(nullptr), Current(nullptr) {}
-    ~Cursor() { setEntry(nullptr); }
+    Cursor() = default;
 
-    Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) {
+    Cursor(const Cursor &O) {
       setEntry(O.CacheEntry);
     }
 
@@ -198,6 +207,8 @@ public:
       return *this;
     }
 
+    ~Cursor() { setEntry(nullptr); }
+
     /// setPhysReg - Point this cursor to PhysReg's interference.
     void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
       // Release reference before getting a new one. That guarantees we can
@@ -229,10 +240,8 @@ public:
       return Current->Last;
     }
   };
-
-  friend class Cursor;
 };
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index ee4929c91482..9c906d309639 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -1,4 +1,4 @@
-//===--------------------- InterleavedAccessPass.cpp ----------------------===//
+//===- InterleavedAccessPass.cpp ------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -42,17 +42,32 @@
 //
 // Similarly, a set of interleaved stores can be transformed into an optimized
 // sequence of shuffles followed by a set of target specific stores for X86.
+//
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <utility>
 
 using namespace llvm;
 
@@ -66,10 +81,10 @@ static cl::opt<bool> LowerInterleavedAccesses(
 namespace {
 
 class InterleavedAccess : public FunctionPass {
-
 public:
   static char ID;
-  InterleavedAccess() : FunctionPass(ID), DT(nullptr), TLI(nullptr) {
+
+  InterleavedAccess() : FunctionPass(ID) {
     initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
   }
 
@@ -83,8 +98,8 @@ public:
   }
 
 private:
-  DominatorTree *DT;
-  const TargetLowering *TLI;
+  DominatorTree *DT = nullptr;
+  const TargetLowering *TLI = nullptr;
 
   /// The maximum supported interleave factor.
   unsigned MaxFactor;
@@ -104,9 +119,11 @@ private:
   bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
                           ArrayRef<ShuffleVectorInst *> Shuffles);
 };
+
 } // end anonymous namespace.
 
 char InterleavedAccess::ID = 0;
+
 INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
     "Lower interleaved memory accesses to target specific intrinsics", false,
     false)
@@ -331,7 +348,6 @@ bool InterleavedAccess::lowerInterleavedLoad(
 bool InterleavedAccess::tryReplaceExtracts(
     ArrayRef<ExtractElementInst *> Extracts,
     ArrayRef<ShuffleVectorInst *> Shuffles) {
-
   // If there aren't any extractelement instructions to modify, there's nothing
   // to do.
   if (Extracts.empty())
@@ -342,7 +358,6 @@ bool InterleavedAccess::tryReplaceExtracts(
   DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
 
   for (auto *Extract : Extracts) {
-
     // The vector index that is extracted.
     auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
     auto Index = IndexOperand->getSExtValue();
@@ -351,7 +366,6 @@ bool InterleavedAccess::tryReplaceExtracts(
     // extractelement instruction (which uses an interleaved load) to use one
     // of the shufflevector instructions instead of the load.
     for (auto *Shuffle : Shuffles) {
-
       // If the shufflevector instruction doesn't dominate the extract, we
       // can't create a use of it.
       if (!DT->dominates(Shuffle, Extract))
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index c6cc909e25d3..12777d5ed110 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -57,10 +57,10 @@ static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
   }
 }
 
-/// ReplaceCallWith - This function is used when we want to lower an intrinsic
-/// call to a call of an external function.  This handles hard cases such as
-/// when there was already a prototype for the external function, and if that
-/// prototype doesn't match the arguments we expect to pass in.
+/// This function is used when we want to lower an intrinsic call to a call of
+/// an external function. This handles hard cases such as when there was already
+/// a prototype for the external function, but that prototype doesn't match the
+/// arguments we expect to pass in.
 template <class ArgIt>
 static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
                                  ArgIt ArgBegin, ArgIt ArgEnd,
@@ -161,12 +161,11 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
       }
 }
 
-/// LowerBSWAP - Emit the code to lower bswap of V before the specified
-/// instruction IP.
+/// Emit the code to lower bswap of V before the specified instruction IP.
 static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
-  assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+  assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
 
-  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  unsigned BitSize = V->getType()->getScalarSizeInBits();
 
   IRBuilder<> Builder(IP);
 
@@ -190,10 +189,10 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
     Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
                                      "bswap.1");
     Tmp3 = Builder.CreateAnd(Tmp3,
-                         ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+                         ConstantInt::get(V->getType(), 0xFF0000),
                              "bswap.and3");
     Tmp2 = Builder.CreateAnd(Tmp2,
-                           ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+                           ConstantInt::get(V->getType(), 0xFF00),
                              "bswap.and2");
     Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
     Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
@@ -221,27 +220,27 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
                                      ConstantInt::get(V->getType(), 56),
                                      "bswap.1");
     Tmp7 = Builder.CreateAnd(Tmp7,
-                             ConstantInt::get(Type::getInt64Ty(Context),
+                             ConstantInt::get(V->getType(),
                                               0xFF000000000000ULL),
                              "bswap.and7");
     Tmp6 = Builder.CreateAnd(Tmp6,
-                             ConstantInt::get(Type::getInt64Ty(Context),
+                             ConstantInt::get(V->getType(),
                                               0xFF0000000000ULL),
                              "bswap.and6");
     Tmp5 = Builder.CreateAnd(Tmp5,
-                        ConstantInt::get(Type::getInt64Ty(Context),
+                        ConstantInt::get(V->getType(),
                              0xFF00000000ULL),
                              "bswap.and5");
     Tmp4 = Builder.CreateAnd(Tmp4,
-                        ConstantInt::get(Type::getInt64Ty(Context),
+                        ConstantInt::get(V->getType(),
                              0xFF000000ULL),
                              "bswap.and4");
     Tmp3 = Builder.CreateAnd(Tmp3,
-                             ConstantInt::get(Type::getInt64Ty(Context),
+                             ConstantInt::get(V->getType(),
                              0xFF0000ULL),
                              "bswap.and3");
     Tmp2 = Builder.CreateAnd(Tmp2,
-                             ConstantInt::get(Type::getInt64Ty(Context),
+                             ConstantInt::get(V->getType(),
                              0xFF00ULL),
                              "bswap.and2");
     Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
@@ -257,8 +256,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
   return V;
 }
 
-/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
-/// instruction IP.
+/// Emit the code to lower ctpop of V before the specified instruction IP.
 static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
 
@@ -297,8 +295,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
   return Count;
 }
 
-/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
-/// instruction IP.
+/// Emit the code to lower ctlz of V before the specified instruction IP.
 static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
 
   IRBuilder<> Builder(IP);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index f2defb4fd623..92edfb059ad6 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -16,11 +16,12 @@
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -29,10 +30,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 void LLVMTargetMachine::initAsmInfo() {
@@ -77,7 +76,6 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
                                      Reloc::Model RM, CodeModel::Model CM,
                                      CodeGenOpt::Level OL)
     : TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
-  T.adjustCodeGenOpts(TT, RM, CM);
   this->RM = RM;
   this->CMModel = CM;
   this->OptLevel = OL;
@@ -92,24 +90,25 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static MCContext *
 addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
-                        bool DisableVerify, AnalysisID StartBefore,
-                        AnalysisID StartAfter, AnalysisID StopBefore,
-                        AnalysisID StopAfter) {
+                        bool DisableVerify, bool &WillCompleteCodeGenPipeline,
+                        raw_pwrite_stream &Out, MachineModuleInfo *MMI) {
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
   TargetPassConfig *PassConfig = TM->createPassConfig(PM);
-  PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore,
-                                 StopAfter);
   // Set PassConfig options provided by TargetMachine.
   PassConfig->setDisableVerify(DisableVerify);
+  WillCompleteCodeGenPipeline = PassConfig->willCompleteCodeGenPipeline();
   PM.add(PassConfig);
-  MachineModuleInfo *MMI = new MachineModuleInfo(TM);
+  if (!MMI)
+    MMI = new MachineModuleInfo(TM);
   PM.add(MMI);
 
   if (PassConfig->addISelPasses())
     return nullptr;
   PassConfig->addMachinePasses();
   PassConfig->setInitialized();
+  if (!WillCompleteCodeGenPipeline)
+    PM.add(createPrintMIRPass(Out));
 
   return &MMI->getContext();
 }
@@ -163,7 +162,8 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
 
     Triple T(getTargetTriple().str());
     AsmStreamer.reset(getTarget().createMCObjectStreamer(
-        T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+        T, Context, std::unique_ptr<MCAsmBackend>(MAB), Out,
+        std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
         Options.MCOptions.MCIncrementalLinkerCompatible,
         /*DWARFMustBeAtTheEnd*/ true));
     break;
@@ -185,23 +185,20 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
   return false;
 }
 
-bool LLVMTargetMachine::addPassesToEmitFile(
-    PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
-    bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
-    AnalysisID StopBefore, AnalysisID StopAfter) {
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                            raw_pwrite_stream &Out,
+                                            CodeGenFileType FileType,
+                                            bool DisableVerify,
+                                            MachineModuleInfo *MMI) {
   // Add common CodeGen passes.
-  MCContext *Context =
-      addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
-                              StopBefore, StopAfter);
+  bool WillCompleteCodeGenPipeline = true;
+  MCContext *Context = addPassesToGenerateCode(
+      this, PM, DisableVerify, WillCompleteCodeGenPipeline, Out, MMI);
   if (!Context)
     return true;
 
-  if (StopBefore || StopAfter) {
-    PM.add(createPrintMIRPass(Out));
-  } else {
-    if (addAsmPrinter(PM, Out, FileType, *Context))
-      return true;
-  }
+  if (WillCompleteCodeGenPipeline && addAsmPrinter(PM, Out, FileType, *Context))
+    return true;
 
   PM.add(createFreeMachineFunctionPass());
   return false;
@@ -216,10 +213,13 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
                                           raw_pwrite_stream &Out,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
-  Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
-                                nullptr, nullptr);
+  bool WillCompleteCodeGenPipeline = true;
+  Ctx = addPassesToGenerateCode(this, PM, DisableVerify,
+                                WillCompleteCodeGenPipeline, Out,
+                                /*MachineModuleInfo*/ nullptr);
   if (!Ctx)
     return true;
+  assert(WillCompleteCodeGenPipeline && "CodeGen pipeline has been altered");
 
   if (Options.MCOptions.MCSaveTempLabels)
     Ctx->setAllowTemporaryLabels(false);
@@ -238,7 +238,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
   const Triple &T = getTargetTriple();
   const MCSubtargetInfo &STI = *getMCSubtargetInfo();
   std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
-      T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+      T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out,
+      std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
       Options.MCOptions.MCIncrementalLinkerCompatible,
       /*DWARFMustBeAtTheEnd*/ true));
 
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 86ef898932a7..8ffd51a550fc 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -134,6 +134,7 @@ SUnit *LatencyPriorityQueue::pop() {
 void LatencyPriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
   std::vector<SUnit *>::iterator I = find(Queue, SU);
+  assert(I != Queue.end() && "Queue doesn't contain the SU being removed!");
   if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());
   Queue.pop_back();
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 995c58a63564..8c54751ee833 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -47,11 +47,11 @@ void LexicalScopes::reset() {
 
 /// initialize - Scan machine function and constuct lexical scope nest.
 void LexicalScopes::initialize(const MachineFunction &Fn) {
+  reset();
   // Don't attempt any lexical scope creation for a NoDebug compile unit.
-  if (Fn.getFunction()->getSubprogram()->getUnit()->getEmissionKind() ==
+  if (Fn.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
       DICompileUnit::NoDebug)
     return;
-  reset();
   MF = &Fn;
   SmallVector<InsnRange, 4> MIRanges;
   DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
@@ -173,7 +173,7 @@ LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
                                                     false)).first;
 
   if (!Parent) {
-    assert(cast<DISubprogram>(Scope)->describes(MF->getFunction()));
+    assert(cast<DISubprogram>(Scope)->describes(&MF->getFunction()));
     assert(!CurrentFnLexicalScope);
     CurrentFnLexicalScope = &I->second;
   }
@@ -277,7 +277,9 @@ void LexicalScopes::assignInstructionRanges(
 /// DebugLoc.
 void LexicalScopes::getMachineBasicBlocks(
     const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+  assert(MF && "Method called on a uninitialized LexicalScopes object!");
   MBBs.clear();
+
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
     return;
@@ -296,6 +298,7 @@ void LexicalScopes::getMachineBasicBlocks(
 /// dominates - Return true if DebugLoc's lexical scope dominates at least one
 /// machine instruction's lexical scope in a given machine basic block.
 bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
+  assert(MF && "Unexpected uninitialized LexicalScopes object!");
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
     return false;
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index b5e705f6455d..19ec281079cb 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -1,4 +1,4 @@
-//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===//
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,28 +18,45 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
 #include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <list>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
 #include <queue>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -47,8 +64,6 @@ using namespace llvm;
 
 STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
 
-namespace {
-
 // \brief If @MI is a DBG_VALUE with debug value described by a defined
 // register, returns the number of this register. In the other case, returns 0.
 static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
@@ -59,8 +74,9 @@ static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
-class LiveDebugValues : public MachineFunctionPass {
+namespace {
 
+class LiveDebugValues : public MachineFunctionPass {
 private:
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
@@ -87,15 +103,15 @@ private:
   };
 
   /// Based on std::pair so it can be used as an index into a DenseMap.
-  typedef std::pair<const DILocalVariable *, const DILocation *>
-      DebugVariableBase;
+  using DebugVariableBase =
+      std::pair<const DILocalVariable *, const DILocation *>;
   /// A potentially inlined instance of a variable.
   struct DebugVariable : public DebugVariableBase {
     DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt)
         : DebugVariableBase(Var, InlinedAt) {}
 
-    const DILocalVariable *getVar() const { return this->first; };
-    const DILocation *getInlinedAt() const { return this->second; };
+    const DILocalVariable *getVar() const { return this->first; }
+    const DILocation *getInlinedAt() const { return this->second; }
 
     bool operator<(const DebugVariable &DV) const {
       if (getVar() == DV.getVar())
@@ -109,38 +125,25 @@ private:
     const DebugVariable Var;
     const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
     mutable UserValueScopes UVS;
-    enum { InvalidKind = 0, RegisterKind } Kind;
+    enum { InvalidKind = 0, RegisterKind } Kind = InvalidKind;
 
     /// The value location. Stored separately to avoid repeatedly
     /// extracting it from MI.
     union {
-      struct {
-        uint32_t RegNo;
-        uint32_t Offset;
-      } RegisterLoc;
+      uint64_t RegNo;
       uint64_t Hash;
     } Loc;
 
     VarLoc(const MachineInstr &MI, LexicalScopes &LS)
         : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
-          UVS(MI.getDebugLoc(), LS), Kind(InvalidKind) {
+          UVS(MI.getDebugLoc(), LS) {
       static_assert((sizeof(Loc) == sizeof(uint64_t)),
                     "hash does not cover all members of Loc");
       assert(MI.isDebugValue() && "not a DBG_VALUE");
       assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
       if (int RegNo = isDbgValueDescribedByReg(MI)) {
         Kind = RegisterKind;
-        Loc.RegisterLoc.RegNo = RegNo;
-        int64_t Offset =
-            MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0;
-        // We don't support offsets larger than 4GiB here. They are
-        // slated to be replaced with DIExpressions anyway.
-        // With indirect debug values used for spill locations, Offset 
-        // can be negative.
-        if (Offset == INT64_MIN || std::abs(Offset) >= (1LL << 32))
-          Kind = InvalidKind;
-        else
-          Loc.RegisterLoc.Offset = Offset;
+        Loc.RegNo = RegNo;
       }
     }
 
@@ -148,7 +151,7 @@ private:
     /// otherwise return 0.
     unsigned isDescribedByReg() const {
       if (Kind == RegisterKind)
-        return Loc.RegisterLoc.RegNo;
+        return Loc.RegNo;
       return 0;
     }
 
@@ -172,14 +175,14 @@ private:
     }
   };
 
-  typedef UniqueVector<VarLoc> VarLocMap;
-  typedef SparseBitVector<> VarLocSet;
-  typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB;
+  using VarLocMap = UniqueVector<VarLoc>;
+  using VarLocSet = SparseBitVector<>;
+  using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
   struct SpillDebugPair {
     MachineInstr *SpillInst;
     MachineInstr *DebugInst;
   };
-  typedef SmallVector<SpillDebugPair, 4> SpillMap;
+  using SpillMap = SmallVector<SpillDebugPair, 4>;
 
   /// This holds the working set of currently open ranges. For fast
   /// access, this is done both as a set of VarLocIDs, and a map of
@@ -275,14 +278,16 @@ public:
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
-} // namespace
+} // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 //            Implementation
 //===----------------------------------------------------------------------===//
 
 char LiveDebugValues::ID = 0;
+
 char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+
 INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis",
                 false, false)
 
@@ -368,7 +373,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
 void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
                                           OpenRangesSet &OpenRanges,
                                           const VarLocMap &VarLocIDs) {
-  MachineFunction *MF = MI.getParent()->getParent();
+  MachineFunction *MF = MI.getMF();
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
   unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
   SparseBitVector<> KillSet;
@@ -444,14 +449,14 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI,
                                         VarLocMap &VarLocIDs,
                                         SpillMap &Spills) {
   unsigned Reg;
-  MachineFunction *MF = MI.getParent()->getParent();
+  MachineFunction *MF = MI.getMF();
   if (!isSpillInstruction(MI, MF, Reg))
     return;
 
   // Check if the register is the location of a debug value.
   for (unsigned ID : OpenRanges.getVarLocs()) {
     if (VarLocIDs[ID].isDescribedByReg() == Reg) {
-      DEBUG(dbgs() << "Spilling Register " << PrintReg(Reg, TRI) << '('
+      DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
                    << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
 
       // Create a DBG_VALUE instruction to describe the Var in its spilled
@@ -460,10 +465,11 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI,
       unsigned SpillBase;
       int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
       const MachineInstr *DMI = &VarLocIDs[ID].MI;
+      auto *SpillExpr = DIExpression::prepend(
+          DMI->getDebugExpression(), DIExpression::NoDeref, SpillOffset);
       MachineInstr *SpDMI =
-          BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, 0,
-                  DMI->getDebugVariable(), DMI->getDebugExpression());
-      SpDMI->getOperand(1).setImm(SpillOffset);
+          BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase,
+                  DMI->getDebugVariable(), SpillExpr);
       DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
             SpDMI->print(dbgs(), false, TII));
 
@@ -582,7 +588,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
     const MachineInstr *DMI = &DiffIt.MI;
     MachineInstr *MI =
         BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
-                DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
+                DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(),
                 DMI->getDebugVariable(), DMI->getDebugExpression());
     if (DMI->isIndirectDebugValue())
       MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
@@ -597,7 +603,6 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
 /// Calculate the liveness information for the given machine function and
 /// extend ranges across basic blocks.
 bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
-
   DEBUG(dbgs() << "\nDebug Range Extension\n");
 
   bool Changed = false;
@@ -698,10 +703,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
 }
 
 bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
-  if (!MF.getFunction()->getSubprogram())
+  if (!MF.getFunction().getSubprogram())
     // LiveDebugValues will already have removed all DBG_VALUEs.
     return false;
 
+  // Skip functions from NoDebug compilation units.
+  if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+      DICompileUnit::NoDebug)
+    return false;
+
   TRI = MF.getSubtarget().getRegisterInfo();
   TII = MF.getSubtarget().getInstrInfo();
   TFI = MF.getSubtarget().getFrameLowering();
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 0c76478af551..34572f24c181 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -20,26 +20,44 @@
 //===----------------------------------------------------------------------===//
 
 #include "LiveDebugVariables.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/IR/Value.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
 #include <memory>
 #include <utility>
 
@@ -52,6 +70,7 @@ EnableLDV("live-debug-variables", cl::init(true),
           cl::desc("Enable the live debug variables pass"), cl::Hidden);
 
 STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+
 char LiveDebugVariables::ID = 0;
 
 INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE,
@@ -68,12 +87,56 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) {
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) {
   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
 }
 
+enum : unsigned { UndefLocNo = ~0U };
+
+/// Describes a location by number along with some flags about the original
+/// usage of the location.
+class DbgValueLocation {
+public:
+  DbgValueLocation(unsigned LocNo, bool WasIndirect)
+      : LocNo(LocNo), WasIndirect(WasIndirect) {
+    static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing");
+    assert(locNo() == LocNo && "location truncation");
+  }
+
+  DbgValueLocation() : LocNo(0), WasIndirect(0) {}
+
+  unsigned locNo() const {
+    // Fix up the undef location number, which gets truncated.
+    return LocNo == INT_MAX ? UndefLocNo : LocNo;
+  }
+  bool wasIndirect() const { return WasIndirect; }
+  bool isUndef() const { return locNo() == UndefLocNo; }
+
+  DbgValueLocation changeLocNo(unsigned NewLocNo) const {
+    return DbgValueLocation(NewLocNo, WasIndirect);
+  }
+
+  friend inline bool operator==(const DbgValueLocation &LHS,
+                                const DbgValueLocation &RHS) {
+    return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect;
+  }
+
+  friend inline bool operator!=(const DbgValueLocation &LHS,
+                                const DbgValueLocation &RHS) {
+    return !(LHS == RHS);
+  }
+
+private:
+  unsigned LocNo : 31;
+  unsigned WasIndirect : 1;
+};
+
 /// LocMap - Map of where a user value is live, and its location.
-typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+using LocMap = IntervalMap<SlotIndex, DbgValueLocation, 4>;
+
+namespace {
+
+class LDVImpl;
 
 /// UserValue - A user value is a part of a debug info user variable.
 ///
@@ -84,17 +147,13 @@ typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
 /// user values are related if they refer to the same variable, or if they are
 /// held by the same virtual register. The equivalence class is the transitive
 /// closure of that relation.
-namespace {
-class LDVImpl;
 class UserValue {
-  const MDNode *Variable;   ///< The debug info variable we are part of.
-  const MDNode *Expression; ///< Any complex address expression.
-  unsigned offset;        ///< Byte offset into variable.
-  bool IsIndirect;        ///< true if this is a register-indirect+offset value.
+  const DILocalVariable *Variable; ///< The debug info variable we are part of.
+  const DIExpression *Expression; ///< Any complex address expression.
   DebugLoc dl;            ///< The debug location for the variable. This is
                           ///< used by dwarf writer to find lexical scope.
   UserValue *leader;      ///< Equivalence class leader.
-  UserValue *next;        ///< Next value in equivalence class, or null.
+  UserValue *next = nullptr; ///< Next value in equivalence class, or null.
 
   /// Numbered locations referenced by locmap.
   SmallVector<MachineOperand, 4> locations;
@@ -102,14 +161,16 @@ class UserValue {
   /// Map of slot indices where this value is live.
   LocMap locInts;
 
-  /// coalesceLocation - After LocNo was changed, check if it has become
-  /// identical to another location, and coalesce them. This may cause LocNo or
-  /// a later location to be erased, but no earlier location will be erased.
-  void coalesceLocation(unsigned LocNo);
+  /// Set of interval start indexes that have been trimmed to the
+  /// lexical scope.
+  SmallSet<SlotIndex, 2> trimmedDefs;
 
   /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
-  void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
-                        LiveIntervals &LIS, const TargetInstrInfo &TII);
+  void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
+                        SlotIndex StopIdx,
+                        DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS,
+                        const TargetInstrInfo &TII,
+                        const TargetRegisterInfo &TRI);
 
   /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
   /// is live. Returns true if any changes were made.
@@ -118,10 +179,10 @@ class UserValue {
 
 public:
   /// UserValue - Create a new UserValue.
-  UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i,
-            DebugLoc L, LocMap::Allocator &alloc)
-      : Variable(var), Expression(expr), offset(o), IsIndirect(i),
-        dl(std::move(L)), leader(this), next(nullptr), locInts(alloc) {}
+  UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
+            LocMap::Allocator &alloc)
+      : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
+        locInts(alloc) {}
 
   /// getLeader - Get the leader of this value's equivalence class.
   UserValue *getLeader() {
@@ -135,10 +196,11 @@ public:
   UserValue *getNext() const { return next; }
 
   /// match - Does this UserValue match the parameters?
-  bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA,
-             unsigned Offset, bool indirect) const {
-    return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA &&
-           Offset == offset && indirect == IsIndirect;
+  bool match(const DILocalVariable *Var, const DIExpression *Expr,
+             const DILocation *IA) const {
+    // FIXME: The fragment should be part of the equivalence class, but not
+    // other things in the expression like stack values.
+    return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
   }
 
   /// merge - Merge equivalence classes.
@@ -165,7 +227,7 @@ public:
   unsigned getLocationNo(const MachineOperand &LocMO) {
     if (LocMO.isReg()) {
       if (LocMO.getReg() == 0)
-        return ~0u;
+        return UndefLocNo;
       // For register locations we dont care about use/def and other flags.
       for (unsigned i = 0, e = locations.size(); i != e; ++i)
         if (locations[i].isReg() &&
@@ -189,14 +251,15 @@ public:
   void mapVirtRegs(LDVImpl *LDV);
 
   /// addDef - Add a definition point to this value.
-  void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+  void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
+    DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
     // Add a singular (Idx,Idx) -> Loc mapping.
     LocMap::iterator I = locInts.find(Idx);
     if (!I.valid() || I.start() != Idx)
-      I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+      I.insert(Idx, Idx.getNextSlot(), Loc);
     else
       // A later DBG_VALUE at the same SlotIndex overrides the old location.
-      I.setValue(getLocationNo(LocMO));
+      I.setValue(Loc);
   }
 
   /// extendDef - Extend the current definition as far as possible down.
@@ -204,12 +267,12 @@ public:
   /// range of VNI.
   /// End points where VNI is no longer live are added to Kills.
   /// @param Idx   Starting point for the definition.
-  /// @param LocNo Location number to propagate.
+  /// @param Loc   Location number to propagate.
   /// @param LR    Restrict liveness to where LR has the value VNI. May be null.
   /// @param VNI   When LR is not null, this is the value to restrict to.
   /// @param Kills Append end points of VNI's live range to Kills.
   /// @param LIS   Live intervals analysis.
-  void extendDef(SlotIndex Idx, unsigned LocNo,
+  void extendDef(SlotIndex Idx, DbgValueLocation Loc,
                  LiveRange *LR, const VNInfo *VNI,
                  SmallVectorImpl<SlotIndex> *Kills,
                  LiveIntervals &LIS);
@@ -219,18 +282,19 @@ public:
   /// points, and add defs if possible.
   /// @param LI      Scan for copies of the value in LI->reg.
   /// @param LocNo   Location number of LI->reg.
+  /// @param WasIndirect Indicates if the original use of LI->reg was indirect
   /// @param Kills   Points where the range of LocNo could be extended.
   /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
-  void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
-                      const SmallVectorImpl<SlotIndex> &Kills,
-                      SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
-                      MachineRegisterInfo &MRI,
-                      LiveIntervals &LIS);
+  void addDefsFromCopies(
+      LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+      const SmallVectorImpl<SlotIndex> &Kills,
+      SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
+      MachineRegisterInfo &MRI, LiveIntervals &LIS);
 
   /// computeIntervals - Compute the live intervals of all locations after
   /// collecting all their def points.
   void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
-                        LiveIntervals &LIS);
+                        LiveIntervals &LIS, LexicalScopes &LS);
 
   /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
   /// live. Returns true if any changes were made.
@@ -238,47 +302,50 @@ public:
                      LiveIntervals &LIS);
 
   /// rewriteLocations - Rewrite virtual register locations according to the
-  /// provided virtual register map.
-  void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+  /// provided virtual register map. Record which locations were spilled.
+  void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
+                        BitVector &SpilledLocations);
 
   /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
-  void emitDebugValues(VirtRegMap *VRM,
-                       LiveIntervals &LIS, const TargetInstrInfo &TRI);
+  void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+                       const TargetInstrInfo &TII,
+                       const TargetRegisterInfo &TRI,
+                       const BitVector &SpilledLocations);
 
   /// getDebugLoc - Return DebugLoc of this UserValue.
   DebugLoc getDebugLoc() { return dl;}
+
   void print(raw_ostream &, const TargetRegisterInfo *);
 };
-} // namespace
 
 /// LDVImpl - Implementation of the LiveDebugVariables pass.
-namespace {
 class LDVImpl {
   LiveDebugVariables &pass;
   LocMap::Allocator allocator;
-  MachineFunction *MF;
+  MachineFunction *MF = nullptr;
   LiveIntervals *LIS;
   const TargetRegisterInfo *TRI;
 
   /// Whether emitDebugValues is called.
-  bool EmitDone;
+  bool EmitDone = false;
+
   /// Whether the machine function is modified during the pass.
-  bool ModifiedMF;
+  bool ModifiedMF = false;
 
   /// userValues - All allocated UserValue instances.
   SmallVector<std::unique_ptr<UserValue>, 8> userValues;
 
   /// Map virtual register to eq class leader.
-  typedef DenseMap<unsigned, UserValue*> VRMap;
+  using VRMap = DenseMap<unsigned, UserValue *>;
   VRMap virtRegToEqClass;
 
   /// Map user variable to eq class leader.
-  typedef DenseMap<const MDNode *, UserValue*> UVMap;
+  using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
   UVMap userVarMap;
 
   /// getUserValue - Find or create a UserValue.
-  UserValue *getUserValue(const MDNode *Var, const MDNode *Expr,
-                          unsigned Offset, bool IsIndirect, const DebugLoc &DL);
+  UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
+                          const DebugLoc &DL);
 
   /// lookupVirtReg - Find the EC leader for VirtReg or null.
   UserValue *lookupVirtReg(unsigned VirtReg);
@@ -300,8 +367,8 @@ class LDVImpl {
   void computeIntervals();
 
 public:
-  LDVImpl(LiveDebugVariables *ps)
-      : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {}
+  LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+
   bool runOnMachineFunction(MachineFunction &mf);
 
   /// clear - Release all memory.
@@ -328,8 +395,10 @@ public:
 
   void print(raw_ostream&);
 };
-} // namespace
 
+} // end anonymous namespace
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
                           const LLVMContext &Ctx) {
   if (!DL)
@@ -372,14 +441,15 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
   printExtendedName(OS, DV, dl);
 
   OS << "\"\t";
-  if (offset)
-    OS << '+' << offset;
   for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
     OS << " [" << I.start() << ';' << I.stop() << "):";
-    if (I.value() == ~0u)
+    if (I.value().isUndef())
       OS << "undef";
-    else
-      OS << I.value();
+    else {
+      OS << I.value().locNo();
+      if (I.value().wasIndirect())
+        OS << " ind";
+    }
   }
   for (unsigned i = 0, e = locations.size(); i != e; ++i) {
     OS << " Loc" << i << '=';
@@ -393,34 +463,7 @@ void LDVImpl::print(raw_ostream &OS) {
   for (unsigned i = 0, e = userValues.size(); i != e; ++i)
     userValues[i]->print(OS, TRI);
 }
-
-void UserValue::coalesceLocation(unsigned LocNo) {
-  unsigned KeepLoc = 0;
-  for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
-    if (KeepLoc == LocNo)
-      continue;
-    if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
-      break;
-  }
-  // No matches.
-  if (KeepLoc == locations.size())
-    return;
-
-  // Keep the smaller location, erase the larger one.
-  unsigned EraseLoc = LocNo;
-  if (KeepLoc > EraseLoc)
-    std::swap(KeepLoc, EraseLoc);
-  locations.erase(locations.begin() + EraseLoc);
-
-  // Rewrite values.
-  for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
-    unsigned v = I.value();
-    if (v == EraseLoc)
-      I.setValue(KeepLoc);      // Coalesce when possible.
-    else if (v > EraseLoc)
-      I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
-  }
-}
+#endif
 
 void UserValue::mapVirtRegs(LDVImpl *LDV) {
   for (unsigned i = 0, e = locations.size(); i != e; ++i)
@@ -429,20 +472,19 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
       LDV->mapVirtReg(locations[i].getReg(), this);
 }
 
-UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
-                                 unsigned Offset, bool IsIndirect,
-                                 const DebugLoc &DL) {
+UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
+                                 const DIExpression *Expr, const DebugLoc &DL) {
   UserValue *&Leader = userVarMap[Var];
   if (Leader) {
     UserValue *UV = Leader->getLeader();
     Leader = UV;
     for (; UV; UV = UV->getNext())
-      if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect))
+      if (UV->match(Var, Expr, DL->getInlinedAt()))
         return UV;
   }
 
   userValues.push_back(
-      make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator));
+      llvm::make_unique<UserValue>(Var, Expr, DL, allocator));
   UserValue *UV = userValues.back().get();
   Leader = UserValue::merge(Leader, UV);
   return UV;
@@ -469,14 +511,15 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
     return false;
   }
 
-  // Get or create the UserValue for (variable,offset).
-  bool IsIndirect = MI.isIndirectDebugValue();
-  unsigned Offset = IsIndirect ? MI.getOperand(1).getImm() : 0;
-  const MDNode *Var = MI.getDebugVariable();
-  const MDNode *Expr = MI.getDebugExpression();
-  //here.
-  UserValue *UV = getUserValue(Var, Expr, Offset, IsIndirect, MI.getDebugLoc());
-  UV->addDef(Idx, MI.getOperand(0));
+  // Get or create the UserValue for (variable,offset) here.
+  bool IsIndirect = MI.getOperand(1).isImm();
+  if (IsIndirect)
+    assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+  const DILocalVariable *Var = MI.getDebugVariable();
+  const DIExpression *Expr = MI.getDebugExpression();
+  UserValue *UV =
+      getUserValue(Var, Expr, MI.getDebugLoc());
+  UV->addDef(Idx, MI.getOperand(0), IsIndirect);
   return true;
 }
 
@@ -511,7 +554,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
 
 /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
 /// data-flow analysis to propagate them beyond basic block boundaries.
-void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
+void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
                           const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
                           LiveIntervals &LIS) {
   SlotIndex Start = Idx;
@@ -538,7 +581,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
   if (I.valid() && I.start() <= Start) {
     // Stop when meeting a different location or an already extended interval.
     Start = Start.getNextSlot();
-    if (I.value() != LocNo || I.stop() != Start)
+    if (I.value() != Loc || I.stop() != Start)
       return;
     // This is a one-slot placeholder. Just skip it.
     ++I;
@@ -554,14 +597,14 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
     Kills->push_back(Stop);
 
   if (Start < Stop)
-    I.insert(Start, Stop, LocNo);
+    I.insert(Start, Stop, Loc);
 }
 
-void
-UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
-                      const SmallVectorImpl<SlotIndex> &Kills,
-                      SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
-                      MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+void UserValue::addDefsFromCopies(
+    LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+    const SmallVectorImpl<SlotIndex> &Kills,
+    SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
+    MachineRegisterInfo &MRI, LiveIntervals &LIS) {
   if (Kills.empty())
     return;
   // Don't track copies from physregs, there are too many uses.
@@ -588,7 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
     // it, or we are looking at a wrong value of LI.
     SlotIndex Idx = LIS.getInstructionIndex(*MI);
     LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
-    if (!I.valid() || I.value() != LocNo)
+    if (!I.valid() || I.value().locNo() != LocNo)
       continue;
 
     if (!LIS.hasInterval(DstReg))
@@ -621,69 +664,141 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
       MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
       assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
       unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
-      I.insert(Idx, Idx.getNextSlot(), LocNo);
-      NewDefs.push_back(std::make_pair(Idx, LocNo));
+      DbgValueLocation NewLoc(LocNo, WasIndirect);
+      I.insert(Idx, Idx.getNextSlot(), NewLoc);
+      NewDefs.push_back(std::make_pair(Idx, NewLoc));
       break;
     }
   }
 }
 
-void
-UserValue::computeIntervals(MachineRegisterInfo &MRI,
-                            const TargetRegisterInfo &TRI,
-                            LiveIntervals &LIS) {
-  SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+void UserValue::computeIntervals(MachineRegisterInfo &MRI,
+                                 const TargetRegisterInfo &TRI,
+                                 LiveIntervals &LIS, LexicalScopes &LS) {
+  SmallVector<std::pair<SlotIndex, DbgValueLocation>, 16> Defs;
 
   // Collect all defs to be extended (Skipping undefs).
   for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
-    if (I.value() != ~0u)
+    if (!I.value().isUndef())
       Defs.push_back(std::make_pair(I.start(), I.value()));
 
   // Extend all defs, and possibly add new ones along the way.
   for (unsigned i = 0; i != Defs.size(); ++i) {
     SlotIndex Idx = Defs[i].first;
-    unsigned LocNo = Defs[i].second;
-    const MachineOperand &Loc = locations[LocNo];
+    DbgValueLocation Loc = Defs[i].second;
+    const MachineOperand &LocMO = locations[Loc.locNo()];
 
-    if (!Loc.isReg()) {
-      extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS);
+    if (!LocMO.isReg()) {
+      extendDef(Idx, Loc, nullptr, nullptr, nullptr, LIS);
       continue;
     }
 
     // Register locations are constrained to where the register value is live.
-    if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+    if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) {
       LiveInterval *LI = nullptr;
       const VNInfo *VNI = nullptr;
-      if (LIS.hasInterval(Loc.getReg())) {
-        LI = &LIS.getInterval(Loc.getReg());
+      if (LIS.hasInterval(LocMO.getReg())) {
+        LI = &LIS.getInterval(LocMO.getReg());
         VNI = LI->getVNInfoAt(Idx);
       }
       SmallVector<SlotIndex, 16> Kills;
-      extendDef(Idx, LocNo, LI, VNI, &Kills, LIS);
+      extendDef(Idx, Loc, LI, VNI, &Kills, LIS);
       if (LI)
-        addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+        addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
+                          LIS);
       continue;
     }
 
-    // For physregs, use the live range of the first regunit as a guide.
-    unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
-    LiveRange *LR = &LIS.getRegUnit(Unit);
-    const VNInfo *VNI = LR->getVNInfoAt(Idx);
-    // Don't track copies from physregs, it is too expensive.
-    extendDef(Idx, LocNo, LR, VNI, nullptr, LIS);
+    // For physregs, we only mark the start slot idx. DwarfDebug will see it
+    // as if the DBG_VALUE is valid up until the end of the basic block, or
+    // the next def of the physical register. So we do not need to extend the
+    // range. It might actually happen that the DBG_VALUE is the last use of
+    // the physical register (e.g. if this is an unused input argument to a
+    // function).
   }
 
-  // Finally, erase all the undefs.
+  // Erase all the undefs.
   for (LocMap::iterator I = locInts.begin(); I.valid();)
-    if (I.value() == ~0u)
+    if (I.value().isUndef())
       I.erase();
     else
       ++I;
+
+  // The computed intervals may extend beyond the range of the debug
+  // location's lexical scope. In this case, splitting of an interval
+  // can result in an interval outside of the scope being created,
+  // causing extra unnecessary DBG_VALUEs to be emitted. To prevent
+  // this, trim the intervals to the lexical scope.
+
+  LexicalScope *Scope = LS.findLexicalScope(dl);
+  if (!Scope)
+    return;
+
+  SlotIndex PrevEnd;
+  LocMap::iterator I = locInts.begin();
+
+  // Iterate over the lexical scope ranges. Each time round the loop
+  // we check the intervals for overlap with the end of the previous
+  // range and the start of the next. The first range is handled as
+  // a special case where there is no PrevEnd.
+  for (const InsnRange &Range : Scope->getRanges()) {
+    SlotIndex RStart = LIS.getInstructionIndex(*Range.first);
+    SlotIndex REnd = LIS.getInstructionIndex(*Range.second);
+
+    // At the start of each iteration I has been advanced so that
+    // I.stop() >= PrevEnd. Check for overlap.
+    if (PrevEnd && I.start() < PrevEnd) {
+      SlotIndex IStop = I.stop();
+      DbgValueLocation Loc = I.value();
+
+      // Stop overlaps previous end - trim the end of the interval to
+      // the scope range.
+      I.setStopUnchecked(PrevEnd);
+      ++I;
+
+      // If the interval also overlaps the start of the "next" (i.e.
+      // current) range create a new interval for the remainder (which
+      // may be further trimmed).
+      if (RStart < IStop)
+        I.insert(RStart, IStop, Loc);
+    }
+
+    // Advance I so that I.stop() >= RStart, and check for overlap.
+    I.advanceTo(RStart);
+    if (!I.valid())
+      return;
+
+    if (I.start() < RStart) {
+      // Interval start overlaps range - trim to the scope range.
+      I.setStartUnchecked(RStart);
+      // Remember that this interval was trimmed.
+      trimmedDefs.insert(RStart);
+    }
+
+    // The end of a lexical scope range is the last instruction in the
+    // range. To convert to an interval we need the index of the
+    // instruction after it.
+    REnd = REnd.getNextIndex();
+
+    // Advance I to first interval outside current range.
+    I.advanceTo(REnd);
+    if (!I.valid())
+      return;
+
+    PrevEnd = REnd;
+  }
+
+  // Check for overlap with end of final range.
+  if (PrevEnd && I.start() < PrevEnd)
+    I.setStopUnchecked(PrevEnd);
 }
 
 void LDVImpl::computeIntervals() {
+  LexicalScopes LS;
+  LS.initialize(*MF);
+
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
-    userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS);
+    userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, LS);
     userValues[i]->mapVirtRegs(this);
   }
 }
@@ -718,7 +833,7 @@ static void removeDebugValues(MachineFunction &mf) {
 bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
   if (!EnableLDV)
     return false;
-  if (!mf.getFunction()->getSubprogram()) {
+  if (!mf.getFunction().getSubprogram()) {
     removeDebugValues(mf);
     return false;
   }
@@ -757,7 +872,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
       continue;
 
     // Don't allocate the new LocNo until it is needed.
-    unsigned NewLocNo = ~0u;
+    unsigned NewLocNo = UndefLocNo;
 
     // Iterate over the overlaps between locInts and LI.
     LocMapI.find(LI->beginIndex());
@@ -772,9 +887,9 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
         break;
 
       // Now LII->end > LocMapI.start(). Do we have an overlap?
-      if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+      if (LocMapI.value().locNo() == OldLocNo && LII->start < LocMapI.stop()) {
         // Overlapping correct location. Allocate NewLocNo now.
-        if (NewLocNo == ~0u) {
+        if (NewLocNo == UndefLocNo) {
           MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
           MO.setSubReg(locations[OldLocNo].getSubReg());
           NewLocNo = getLocationNo(MO);
@@ -783,6 +898,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
 
         SlotIndex LStart = LocMapI.start();
         SlotIndex LStop  = LocMapI.stop();
+        DbgValueLocation OldLoc = LocMapI.value();
 
         // Trim LocMapI down to the LII overlap.
         if (LStart < LII->start)
@@ -791,17 +907,17 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
           LocMapI.setStopUnchecked(LII->end);
 
         // Change the value in the overlap. This may trigger coalescing.
-        LocMapI.setValue(NewLocNo);
+        LocMapI.setValue(OldLoc.changeLocNo(NewLocNo));
 
         // Re-insert any removed OldLocNo ranges.
         if (LStart < LocMapI.start()) {
-          LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+          LocMapI.insert(LStart, LocMapI.start(), OldLoc);
           ++LocMapI;
           assert(LocMapI.valid() && "Unexpected coalescing");
         }
         if (LStop > LocMapI.stop()) {
           ++LocMapI;
-          LocMapI.insert(LII->end, LStop, OldLocNo);
+          LocMapI.insert(LII->end, LStop, OldLoc);
           --LocMapI;
         }
       }
@@ -824,14 +940,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
   locations.erase(locations.begin() + OldLocNo);
   LocMapI.goToBegin();
   while (LocMapI.valid()) {
-    unsigned v = LocMapI.value();
-    if (v == OldLocNo) {
+    DbgValueLocation v = LocMapI.value();
+    if (v.locNo() == OldLocNo) {
       DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
                    << LocMapI.stop() << ")\n");
       LocMapI.erase();
     } else {
-      if (v > OldLocNo)
-        LocMapI.setValueUnchecked(v-1);
+      if (v.locNo() > OldLocNo)
+        LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1));
       ++LocMapI;
     }
   }
@@ -876,36 +992,73 @@ splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
     static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
 }
 
-void
-UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
-  // Iterate over locations in reverse makes it easier to handle coalescing.
-  for (unsigned i = locations.size(); i ; --i) {
-    unsigned LocNo = i-1;
-    MachineOperand &Loc = locations[LocNo];
+void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,
+                                 BitVector &SpilledLocations) {
+  // Build a set of new locations with new numbers so we can coalesce our
+  // IntervalMap if two vreg intervals collapse to the same physical location.
+  // Use MapVector instead of SetVector because MapVector::insert returns the
+  // position of the previously or newly inserted element. The boolean value
+  // tracks if the location was produced by a spill.
+  // FIXME: This will be problematic if we ever support direct and indirect
+  // frame index locations, i.e. expressing both variables in memory and
+  // 'int x, *px = &x'. The "spilled" bit must become part of the location.
+  MapVector<MachineOperand, bool> NewLocations;
+  SmallVector<unsigned, 4> LocNoMap(locations.size());
+  for (unsigned I = 0, E = locations.size(); I != E; ++I) {
+    bool Spilled = false;
+    MachineOperand Loc = locations[I];
     // Only virtual registers are rewritten.
-    if (!Loc.isReg() || !Loc.getReg() ||
-        !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
-      continue;
-    unsigned VirtReg = Loc.getReg();
-    if (VRM.isAssignedReg(VirtReg) &&
-        TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
-      // This can create a %noreg operand in rare cases when the sub-register
-      // index is no longer available. That means the user value is in a
-      // non-existent sub-register, and %noreg is exactly what we want.
-      Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
-    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
-      // FIXME: Translate SubIdx to a stackslot offset.
-      Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
-    } else {
-      Loc.setReg(0);
-      Loc.setSubReg(0);
+    if (Loc.isReg() && Loc.getReg() &&
+        TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+      unsigned VirtReg = Loc.getReg();
+      if (VRM.isAssignedReg(VirtReg) &&
+          TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+        // This can create a %noreg operand in rare cases when the sub-register
+        // index is no longer available. That means the user value is in a
+        // non-existent sub-register, and %noreg is exactly what we want.
+        Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+      } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
+        // FIXME: Translate SubIdx to a stackslot offset.
+        Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+        Spilled = true;
+      } else {
+        Loc.setReg(0);
+        Loc.setSubReg(0);
+      }
+    }
+
+    // Insert this location if it doesn't already exist and record a mapping
+    // from the old number to the new number.
+    auto InsertResult = NewLocations.insert({Loc, Spilled});
+    unsigned NewLocNo = std::distance(NewLocations.begin(), InsertResult.first);
+    LocNoMap[I] = NewLocNo;
+  }
+
+  // Rewrite the locations and record which ones were spill slots.
+  locations.clear();
+  SpilledLocations.clear();
+  SpilledLocations.resize(NewLocations.size());
+  for (auto &Pair : NewLocations) {
+    locations.push_back(Pair.first);
+    if (Pair.second) {
+      unsigned NewLocNo = std::distance(&*NewLocations.begin(), &Pair);
+      SpilledLocations.set(NewLocNo);
     }
-    coalesceLocation(LocNo);
+  }
+
+  // Update the interval map, but only coalesce left, since intervals to the
+  // right use the old location numbers. This should merge two contiguous
+  // DBG_VALUE intervals with different vregs that were allocated to the same
+  // physical register.
+  for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+    DbgValueLocation Loc = I.value();
+    unsigned NewLocNo = LocNoMap[Loc.locNo()];
+    I.setValueUnchecked(Loc.changeLocNo(NewLocNo));
+    I.setStart(I.start());
   }
 }
 
-/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
-/// instruction.
+/// Find an iterator for inserting a DBG_VALUE instruction.
 static MachineBasicBlock::iterator
 findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
                    LiveIntervals &LIS) {
@@ -928,52 +1081,111 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
                               std::next(MachineBasicBlock::iterator(MI));
 }
 
-void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
-                                 unsigned LocNo,
+/// Find an iterator for inserting the next DBG_VALUE instruction
+/// (or end if no more insert locations found).
+static MachineBasicBlock::iterator
+findNextInsertLocation(MachineBasicBlock *MBB,
+                       MachineBasicBlock::iterator I,
+                       SlotIndex StopIdx, MachineOperand &LocMO,
+                       LiveIntervals &LIS,
+                       const TargetRegisterInfo &TRI) {
+  if (!LocMO.isReg())
+    return MBB->instr_end();
+  unsigned Reg = LocMO.getReg();
+
+  // Find the next instruction in the MBB that define the register Reg.
+  while (I != MBB->end()) {
+    if (!LIS.isNotInMIMap(*I) &&
+        SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I)))
+      break;
+    if (I->definesRegister(Reg, &TRI))
+      // The insert location is directly after the instruction/bundle.
+      return std::next(I);
+    ++I;
+  }
+  return MBB->end();
+}
+
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
+                                 SlotIndex StopIdx,
+                                 DbgValueLocation Loc, bool Spilled,
                                  LiveIntervals &LIS,
-                                 const TargetInstrInfo &TII) {
-  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
-  MachineOperand &Loc = locations[LocNo];
+                                 const TargetInstrInfo &TII,
+                                 const TargetRegisterInfo &TRI) {
+  SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB);
+  // Only search within the current MBB.
+  StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx;
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS);
+  MachineOperand &MO = locations[Loc.locNo()];
   ++NumInsertedDebugValues;
 
   assert(cast<DILocalVariable>(Variable)
              ->isValidLocationForIntrinsic(getDebugLoc()) &&
          "Expected inlined-at fields to agree");
-  if (Loc.isReg())
-    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
-            IsIndirect, Loc.getReg(), offset, Variable, Expression);
-  else
-    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
-        .add(Loc)
-        .addImm(offset)
-        .addMetadata(Variable)
-        .addMetadata(Expression);
+
+  // If the location was spilled, the new DBG_VALUE will be indirect. If the
+  // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate
+  // that the original virtual register was a pointer.
+  const DIExpression *Expr = Expression;
+  bool IsIndirect = Loc.wasIndirect();
+  if (Spilled) {
+    if (IsIndirect)
+      Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+    IsIndirect = true;
+  }
+
+  assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
+
+  do {
+    MachineInstrBuilder MIB =
+      BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+          .add(MO);
+    if (IsIndirect)
+      MIB.addImm(0U);
+    else
+      MIB.addReg(0U, RegState::Debug);
+    MIB.addMetadata(Variable).addMetadata(Expr);
+
+    // Continue and insert DBG_VALUES after every redefinition of register
+    // associated with the debug value within the range
+    I = findNextInsertLocation(MBB, I, StopIdx, MO, LIS, TRI);
+  } while (I != MBB->end());
 }
 
 void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
-                                const TargetInstrInfo &TII) {
+                                const TargetInstrInfo &TII,
+                                const TargetRegisterInfo &TRI,
+                                const BitVector &SpilledLocations) {
   MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
 
   for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
     SlotIndex Start = I.start();
     SlotIndex Stop = I.stop();
-    unsigned LocNo = I.value();
-    DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+    DbgValueLocation Loc = I.value();
+    bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false;
+
+    // If the interval start was trimmed to the lexical scope insert the
+    // DBG_VALUE at the previous index (otherwise it appears after the
+    // first instruction in the range).
+    if (trimmedDefs.count(Start))
+      Start = Start.getPrevIndex();
+
+    DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
     MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
     SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
 
-    DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
-    insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
+    DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+    insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
     // This interval may span multiple basic blocks.
     // Insert a DBG_VALUE into each one.
-    while(Stop > MBBEnd) {
+    while (Stop > MBBEnd) {
       // Move to the next block.
       Start = MBBEnd;
       if (++MBB == MFEnd)
         break;
       MBBEnd = LIS.getMBBEndIdx(&*MBB);
-      DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
-      insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
+      DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);
+      insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);
     }
     DEBUG(dbgs() << '\n');
     if (MBB == MFEnd)
@@ -988,10 +1200,11 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
   if (!MF)
     return;
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  BitVector SpilledLocations;
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
     DEBUG(userValues[i]->print(dbgs(), TRI));
-    userValues[i]->rewriteLocations(*VRM, *TRI);
-    userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+    userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations);
+    userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations);
   }
   EmitDone = true;
 }
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 1d7e3d4371a2..aa35880b063a 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -1,4 +1,4 @@
-//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,17 +22,16 @@
 #define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Compiler.h"
 
 namespace llvm {
 
 template <typename T> class ArrayRef;
-class LiveInterval;
 class LiveIntervals;
 class VirtRegMap;
 
 class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
-  void *pImpl;
+  void *pImpl = nullptr;
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -62,14 +61,12 @@ public:
   void dump() const;
 
 private:
-
   bool runOnMachineFunction(MachineFunction &) override;
   void releaseMemory() override;
   void getAnalysisUsage(AnalysisUsage &) const override;
   bool doInitialization(Module &) override;
-
 };
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 9ef9f238fdce..302c75133e35 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -1,4 +1,4 @@
-//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//===- LiveInterval.cpp - Live Interval Representation --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,20 +19,34 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveInterval.h"
-
 #include "LiveRangeUtils.h"
 #include "RegisterCoalescer.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <utility>
+
 using namespace llvm;
 
 namespace {
+
 //===----------------------------------------------------------------------===//
 // Implementation of various methods necessary for calculation of live ranges.
 // The implementation of the methods abstracts from the concrete type of the
@@ -56,8 +70,8 @@ protected:
   CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {}
 
 public:
-  typedef LiveRange::Segment Segment;
-  typedef IteratorT iterator;
+  using Segment = LiveRange::Segment;
+  using iterator = IteratorT;
 
   /// A counterpart of LiveRange::createDeadDef: Make sure the range has a
   /// value defined at @p Def.
@@ -265,8 +279,9 @@ private:
 //===----------------------------------------------------------------------===//
 
 class CalcLiveRangeUtilVector;
-typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
-                              LiveRange::Segments> CalcLiveRangeUtilVectorBase;
+using CalcLiveRangeUtilVectorBase =
+    CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
+                          LiveRange::Segments>;
 
 class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase {
 public:
@@ -292,9 +307,9 @@ private:
 //===----------------------------------------------------------------------===//
 
 class CalcLiveRangeUtilSet;
-typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet,
-                              LiveRange::SegmentSet::iterator,
-                              LiveRange::SegmentSet> CalcLiveRangeUtilSetBase;
+using CalcLiveRangeUtilSetBase =
+    CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, LiveRange::SegmentSet::iterator,
+                          LiveRange::SegmentSet>;
 
 class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase {
 public:
@@ -327,7 +342,8 @@ private:
     return I;
   }
 };
-} // namespace
+
+} // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 //   LiveRange methods
@@ -444,7 +460,7 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
   if (J == JE)
     return false;
 
-  for (;;) {
+  while (true) {
     // J has just been advanced to satisfy:
     assert(J->end >= I->start);
     // Check for an overlap.
@@ -865,7 +881,6 @@ void LiveInterval::clearSubRanges() {
 
 void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
     LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
-
   LaneBitmask ToApply = LaneMask;
   for (SubRange &SR : subranges()) {
     LaneBitmask SRMask = SR.LaneMask;
@@ -925,8 +940,8 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
   }
 }
 
-raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
-  return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
+raw_ostream& llvm::operator<<(raw_ostream& OS, const LiveRange::Segment &S) {
+  return OS << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -971,7 +986,7 @@ void LiveInterval::SubRange::print(raw_ostream &OS) const {
 }
 
 void LiveInterval::print(raw_ostream &OS) const {
-  OS << PrintReg(reg) << ' ';
+  OS << printReg(reg) << ' ';
   super::print(OS);
   // Print subranges
   for (const SubRange &SR : subranges())
@@ -1033,7 +1048,6 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
 }
 #endif
 
-
 //===----------------------------------------------------------------------===//
 //                           LiveRangeUpdater class
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index b3248e53d0a5..3e742a6c2f21 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -17,8 +17,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <cstdlib>
 
@@ -87,7 +87,7 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
   }
   for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
     OS << " [" << SI.start() << ' ' << SI.stop() << "):"
-       << PrintReg(SI.value()->reg, TRI);
+       << printReg(SI.value()->reg, TRI);
   }
   OS << '\n';
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervals.cpp
index 0e240f482a19..79fdba7e062a 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -1,4 +1,4 @@
-//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===//
+//===- LiveIntervals.cpp - Live Interval Analysis -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,7 +14,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "LiveRangeCalc.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -34,6 +34,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -44,8 +46,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -157,7 +157,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
   // Dump the regunits.
   for (unsigned Unit = 0, UnitE = RegUnitRanges.size(); Unit != UnitE; ++Unit)
     if (LiveRange *LR = RegUnitRanges[Unit])
-      OS << PrintRegUnit(Unit, TRI) << ' ' << *LR << '\n';
+      OS << printRegUnit(Unit, TRI) << ' ' << *LR << '\n';
 
   // Dump the virtregs.
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
@@ -323,7 +323,7 @@ void LiveIntervals::computeLiveInRegUnits() {
 
     // Create phi-defs at Begin for all live-in registers.
     SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
-    DEBUG(dbgs() << Begin << "\tBB#" << MBB.getNumber());
+    DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));
     for (const auto &LI : MBB.liveins()) {
       for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
         unsigned Unit = *Units;
@@ -335,7 +335,7 @@ void LiveIntervals::computeLiveInRegUnits() {
         }
         VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
         (void)VNI;
-        DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
+        DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id);
       }
     }
     DEBUG(dbgs() << '\n');
@@ -698,11 +698,11 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
       // Check if any of the regunits are live beyond the end of RI. That could
       // happen when a physreg is defined as a copy of a virtreg:
       //
-      //   %EAX = COPY %vreg5
-      //   FOO %vreg5         <--- MI, cancel kill because %EAX is live.
-      //   BAR %EAX<kill>
+      //   %eax = COPY %5
+      //   FOO %5             <--- MI, cancel kill because %eax is live.
+      //   BAR killed %eax
       //
-      // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
+      // There should be no kill flag on FOO when %5 is rewritten as %eax.
       for (auto &RUP : RU) {
         const LiveRange &RURange = *RUP.first;
         LiveRange::const_iterator &I = RUP.second;
@@ -719,13 +719,13 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         // When reading a partial undefined value we must not add a kill flag.
         // The regalloc might have used the undef lane for something else.
         // Example:
-        //     %vreg1 = ...              ; R32: %vreg1
-        //     %vreg2:high16 = ...       ; R64: %vreg2
-        //        = read %vreg2<kill>    ; R64: %vreg2
-        //        = read %vreg1          ; R32: %vreg1
-        // The <kill> flag is correct for %vreg2, but the register allocator may
-        // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0
-        // are actually never written by %vreg2. After assignment the <kill>
+        //     %1 = ...                  ; R32: %1
+        //     %2:high16 = ...           ; R64: %2
+        //        = read killed %2        ; R64: %2
+        //        = read %1              ; R32: %1
+        // The <kill> flag is correct for %2, but the register allocator may
+        // assign R0L to %1, and R0 to %2 because the low 32bits of R0
+        // are actually never written by %2. After assignment the <kill>
         // flag at the read instruction is invalid.
         LaneBitmask DefinedLanesMask;
         if (!SRs.empty()) {
@@ -824,7 +824,13 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
 float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
                                     const MachineBlockFrequencyInfo *MBFI,
                                     const MachineInstr &MI) {
-  BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent());
+  return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
+}
+
+float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
+                                    const MachineBlockFrequencyInfo *MBFI,
+                                    const MachineBasicBlock *MBB) {
+  BlockFrequency Freq = MBFI->getBlockFreq(MBB);
   const float Scale = 1.0f / MBFI->getEntryFreq();
   return (isDef + isUse) * (Freq.getFrequency() * Scale);
 }
@@ -989,11 +995,11 @@ private:
     DEBUG({
       dbgs() << "     ";
       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-        dbgs() << PrintReg(Reg);
+        dbgs() << printReg(Reg);
         if (LaneMask.any())
           dbgs() << " L" << PrintLaneMask(LaneMask);
       } else {
-        dbgs() << PrintRegUnit(Reg, &TRI);
+        dbgs() << printRegUnit(Reg, &TRI);
       }
       dbgs() << ":\t" << LR << '\n';
     });
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index cde6ccd29dfd..f4b43a9b8ead 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -40,10 +40,8 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
   }
 }
 
-/// Simulates liveness when stepping backwards over an instruction(bundle):
-/// Remove Defs, add uses. This is the recommended way of calculating liveness.
-void LivePhysRegs::stepBackward(const MachineInstr &MI) {
-  // Remove defined registers and regmask kills from the set.
+/// Remove defined registers and regmask kills from the set.
+void LivePhysRegs::removeDefs(const MachineInstr &MI) {
   for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
     if (O->isReg()) {
       if (!O->isDef())
@@ -55,8 +53,10 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
     } else if (O->isRegMask())
       removeRegsInMask(*O);
   }
+}
 
-  // Add uses to the set.
+/// Add uses to the set.
+void LivePhysRegs::addUses(const MachineInstr &MI) {
   for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
     if (!O->isReg() || !O->readsReg())
       continue;
@@ -67,6 +67,16 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
   }
 }
 
+/// Simulates liveness when stepping backwards over an instruction(bundle):
+/// Remove Defs, add uses. This is the recommended way of calculating liveness.
+void LivePhysRegs::stepBackward(const MachineInstr &MI) {
+  // Remove defined registers and regmask kills from the set.
+  removeDefs(MI);
+
+  // Add uses to the set.
+  addUses(MI);
+}
+
 /// Simulates liveness when stepping forward over an instruction(bundle): Remove
 /// killed-uses, add defs. This is the not recommended way, because it depends
 /// on accurate kill flags. If possible use stepBackward() instead of this
@@ -116,7 +126,7 @@ void LivePhysRegs::print(raw_ostream &OS) const {
   }
 
   for (const_iterator I = begin(), E = end(); I != E; ++I)
-    OS << " " << PrintReg(*I, TRI);
+    OS << " " << printReg(*I, TRI);
   OS << "\n";
 }
 
@@ -166,17 +176,32 @@ static void addCalleeSavedRegs(LivePhysRegs &LiveRegs,
     LiveRegs.addReg(*CSR);
 }
 
-/// Adds pristine registers to the given \p LiveRegs. Pristine registers are
-/// callee saved registers that are unused in the function.
-static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) {
+void LivePhysRegs::addPristines(const MachineFunction &MF) {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   if (!MFI.isCalleeSavedInfoValid())
     return;
+  /// This function will usually be called on an empty object, handle this
+  /// as a special case.
+  if (empty()) {
+    /// Add all callee saved regs, then remove the ones that are saved and
+    /// restored.
+    addCalleeSavedRegs(*this, MF);
+    /// Remove the ones that are not saved/restored; they are pristine.
+    for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+      removeReg(Info.getReg());
+    return;
+  }
+  /// If a callee-saved register that is not pristine is already present
+  /// in the set, we should make sure that it stays in it. Precompute the
+  /// set of pristine registers in a separate object.
   /// Add all callee saved regs, then remove the ones that are saved+restored.
-  addCalleeSavedRegs(LiveRegs, MF);
+  LivePhysRegs Pristine(*TRI);
+  addCalleeSavedRegs(Pristine, MF);
   /// Remove the ones that are not saved/restored; they are pristine.
   for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
-    LiveRegs.removeReg(Info.getReg());
+    Pristine.removeReg(Info.getReg());
+  for (MCPhysReg R : Pristine)
+    addReg(R);
 }
 
 void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
@@ -192,7 +217,8 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
     const MachineFrameInfo &MFI = MF.getFrameInfo();
     if (MFI.isCalleeSavedInfoValid()) {
       for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
-        addReg(Info.getReg());
+        if (Info.isRestored())
+          addReg(Info.getReg());
     }
   }
 }
@@ -200,7 +226,7 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
 void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
   if (!MBB.succ_empty()) {
-    addPristines(*this, MF);
+    addPristines(MF);
     addLiveOutsNoPristines(MBB);
   } else if (MBB.isReturnBlock()) {
     // For the return block: Add all callee saved registers.
@@ -212,21 +238,27 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
 
 void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
-  addPristines(*this, MF);
+  addPristines(MF);
   addBlockLiveIns(MBB);
 }
 
 void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
-                          const MachineRegisterInfo &MRI,
-                          MachineBasicBlock &MBB) {
+                          const MachineBasicBlock &MBB) {
+  const MachineFunction &MF = *MBB.getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
-  assert(MBB.livein_empty());
   LiveRegs.init(TRI);
   LiveRegs.addLiveOutsNoPristines(MBB);
-  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+  for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
     LiveRegs.stepBackward(MI);
+}
 
-  for (unsigned Reg : LiveRegs) {
+void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) {
+  assert(MBB.livein_empty() && "Expected empty live-in list");
+  const MachineFunction &MF = *MBB.getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  for (MCPhysReg Reg : LiveRegs) {
     if (MRI.isReserved(Reg))
       continue;
     // Skip the register if we are about to add one of its super registers.
@@ -242,3 +274,56 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
     MBB.addLiveIn(Reg);
   }
 }
+
+void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
+  const MachineFunction &MF = *MBB.getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+
+  // We walk through the block backwards and start with the live outs.
+  LivePhysRegs LiveRegs;
+  LiveRegs.init(TRI);
+  LiveRegs.addLiveOutsNoPristines(MBB);
+
+  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+    // Recompute dead flags.
+    for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef() || MO->isDebug())
+        continue;
+
+      unsigned Reg = MO->getReg();
+      if (Reg == 0)
+        continue;
+      assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+
+      bool IsNotLive = LiveRegs.available(MRI, Reg);
+      MO->setIsDead(IsNotLive);
+    }
+
+    // Step backward over defs.
+    LiveRegs.removeDefs(MI);
+
+    // Recompute kill flags.
+    for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->readsReg() || MO->isDebug())
+        continue;
+
+      unsigned Reg = MO->getReg();
+      if (Reg == 0)
+        continue;
+      assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+
+      bool IsNotLive = LiveRegs.available(MRI, Reg);
+      MO->setIsKill(IsNotLive);
+    }
+
+    // Complete the stepbackward.
+    LiveRegs.addUses(MI);
+  }
+}
+
+void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs,
+                                MachineBasicBlock &MBB) {
+  computeLiveIns(LiveRegs, MBB);
+  addLiveIns(MBB, LiveRegs);
+}
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 8c43c9f3f884..66c23b7b69ce 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -1,4 +1,4 @@
-//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===//
+//===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,9 +12,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "LiveRangeCalc.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
 
 using namespace llvm;
 
@@ -44,7 +62,6 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
   LiveIn.clear();
 }
 
-
 static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
                           LiveRange &LR, const MachineOperand &MO) {
   const MachineInstr &MI = *MO.getParent();
@@ -136,7 +153,6 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
     createDeadDef(*Indexes, *Alloc, LR, MO);
 }
 
-
 void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
                                  LiveInterval *LI) {
   SmallVector<SlotIndex, 4> Undefs;
@@ -148,7 +164,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
   const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
   for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
     // Clear all kill flags. They will be reinserted after register allocation
-    // by LiveIntervalAnalysis::addKillFlags().
+    // by LiveIntervals::addKillFlags().
     if (MO.isUse())
       MO.setIsKill(false);
     // MO::readsReg returns "true" for subregister defs. This is for keeping
@@ -197,7 +213,6 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
   }
 }
 
-
 void LiveRangeCalc::updateFromLiveIns() {
   LiveRangeUpdater Updater;
   for (const LiveInBlock &I : LiveIn) {
@@ -248,7 +263,6 @@ void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
   calculateValues();
 }
 
-
 // This function is called by a client after using the low-level API to add
 // live-out and live-in blocks.  The unique value optimization is not
 // available, SplitEditor::transferValues handles that case directly anyway.
@@ -259,7 +273,6 @@ void LiveRangeCalc::calculateValues() {
   updateFromLiveIns();
 }
 
-
 bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
                                  MachineBasicBlock &MBB, BitVector &DefOnEntry,
                                  BitVector &UndefOnEntry) {
@@ -351,7 +364,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
 #ifndef NDEBUG
     if (MBB->pred_empty()) {
       MBB->getParent()->verify();
-      errs() << "Use of " << PrintReg(PhysReg)
+      errs() << "Use of " << printReg(PhysReg)
              << " does not have a corresponding definition on every path:\n";
       const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
       if (MI != nullptr)
@@ -363,8 +376,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
         !MBB->isLiveIn(PhysReg)) {
       MBB->getParent()->verify();
       const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
-      errs() << "The register " << PrintReg(PhysReg, TRI)
-             << " needs to be live in to BB#" << MBB->getNumber()
+      errs() << "The register " << printReg(PhysReg, TRI)
+             << " needs to be live in to " << printMBBReference(*MBB)
              << ", but is missing from the live-in list.\n";
       report_fatal_error("Invalid global physical register");
     }
@@ -410,7 +423,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
 
   LiveIn.clear();
   FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI);
-  if (Undefs.size() > 0 && FoundUndef)
+  if (!Undefs.empty() && FoundUndef)
     UniqueVNI = false;
 
   // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
@@ -454,7 +467,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
   LiveIn.reserve(WorkList.size());
   for (unsigned BN : WorkList) {
     MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
-    if (Undefs.size() > 0 &&
+    if (!Undefs.empty() &&
         !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
       continue;
     addLiveInBlock(LR, DomTree->getNode(MBB));
@@ -465,7 +478,6 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
   return false;
 }
 
-
 // This is essentially the same iterative algorithm that SSAUpdater uses,
 // except we already have a dominator tree, so we don't have to recompute it.
 void LiveRangeCalc::updateSSA() {
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index d41b782d9bdf..c4914f23f56d 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -1,4 +1,4 @@
-//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===//
+//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -26,28 +26,35 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include <utility>
 
 namespace llvm {
 
-/// Forward declarations for MachineDominators.h:
-class MachineDominatorTree;
 template <class NodeT> class DomTreeNodeBase;
-typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+class MachineDominatorTree;
+class MachineFunction;
+class MachineRegisterInfo;
+
+using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
 
 class LiveRangeCalc {
-  const MachineFunction *MF;
-  const MachineRegisterInfo *MRI;
-  SlotIndexes *Indexes;
-  MachineDominatorTree *DomTree;
-  VNInfo::Allocator *Alloc;
+  const MachineFunction *MF = nullptr;
+  const MachineRegisterInfo *MRI = nullptr;
+  SlotIndexes *Indexes = nullptr;
+  MachineDominatorTree *DomTree = nullptr;
+  VNInfo::Allocator *Alloc = nullptr;
 
   /// LiveOutPair - A value and the block that defined it.  The domtree node is
   /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
-  typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+  using LiveOutPair = std::pair<VNInfo *, MachineDomTreeNode *>;
 
   /// LiveOutMap - Map basic blocks to the value leaving the block.
-  typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+  using LiveOutMap = IndexedMap<LiveOutPair, MBB2NumberFunctor>;
 
   /// Bit vector of active entries in LiveOut, also used as a visited set by
   /// findReachingDefs.  One entry per basic block, indexed by block number.
@@ -66,7 +73,7 @@ class LiveRangeCalc {
   /// registers do not overlap), but the defined/undefined information must
   /// be kept separate for each individual range.
   /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
-  typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+  using EntryInfoMap = DenseMap<LiveRange *, std::pair<BitVector, BitVector>>;
   EntryInfoMap EntryInfos;
 
   /// Map each basic block where a live range is live out to the live-out value
@@ -105,10 +112,10 @@ class LiveRangeCalc {
     SlotIndex Kill;
 
     // Live-in value filled in by updateSSA once it is known.
-    VNInfo *Value;
+    VNInfo *Value = nullptr;
 
     LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
-      : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {}
+      : LR(LR), DomNode(node), Kill(kill) {}
   };
 
   /// LiveIn - Work list of blocks where the live-in value has yet to be
@@ -171,8 +178,7 @@ class LiveRangeCalc {
   void resetLiveOutMap();
 
 public:
-  LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr),
-                    DomTree(nullptr), Alloc(nullptr) {}
+  LiveRangeCalc() = default;
 
   //===--------------------------------------------------------------------===//
   // High-level interface.
@@ -186,10 +192,8 @@ public:
   /// that may overlap a previously computed live range, and before the first
   /// live range in a function.  If live ranges are not known to be
   /// non-overlapping, call reset before each.
-  void reset(const MachineFunction *MF,
-             SlotIndexes*,
-             MachineDominatorTree*,
-             VNInfo::Allocator*);
+  void reset(const MachineFunction *mf, SlotIndexes *SI,
+             MachineDominatorTree *MDT, VNInfo::Allocator *VNIA);
 
   //===--------------------------------------------------------------------===//
   // Mid-level interface.
@@ -282,4 +286,4 @@ public:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 92cca1a54951..86cfbd87f5b1 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -14,12 +14,12 @@
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
@@ -465,7 +465,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
     if (MRI.recomputeRegClass(LI.reg))
       DEBUG({
         const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-        dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+        dbgs() << "Inflated " << printReg(LI.reg) << " to "
                << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
       });
     VRAI.calculateSpillWeightAndHint(LI);
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
index 552f4b5393fe..02e1f3b01ade 100644
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -1,4 +1,4 @@
-//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===//
+//===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,20 +14,32 @@
 /// uses, all of which are the only use of the def.
 ///
 ///===---------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
 
 #define DEBUG_TYPE "lrshrink"
 
 STATISTIC(NumInstrsHoistedToShrinkLiveRange,
           "Number of insructions hoisted to shrink live range.");
 
-using namespace llvm;
-
 namespace {
+
 class LiveRangeShrink : public MachineFunctionPass {
 public:
   static char ID;
@@ -45,23 +57,26 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
-} // End anonymous namespace.
+
+} // end anonymous namespace
 
 char LiveRangeShrink::ID = 0;
+
 char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID;
 
 INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false,
                 false)
-namespace {
-typedef DenseMap<MachineInstr *, unsigned> InstOrderMap;
+
+using InstOrderMap = DenseMap<MachineInstr *, unsigned>;
 
 /// Returns \p New if it's dominated by \p Old, otherwise return \p Old.
 /// \p M maintains a map from instruction to its dominating order that satisfies
 /// M[A] > M[B] guarantees that A is dominated by B.
 /// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return
 /// \p New.
-MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
-                                       const InstOrderMap &M) {
+static MachineInstr *FindDominatedInstruction(MachineInstr &New,
+                                              MachineInstr *Old,
+                                              const InstOrderMap &M) {
   auto NewIter = M.find(&New);
   if (NewIter == M.end())
     return Old;
@@ -82,16 +97,16 @@ MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
 
 /// Builds Instruction to its dominating order number map \p M by traversing
 /// from instruction \p Start.
-void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) {
+static void BuildInstOrderMap(MachineBasicBlock::iterator Start,
+                              InstOrderMap &M) {
   M.clear();
   unsigned i = 0;
   for (MachineInstr &I : make_range(Start, Start->getParent()->end()))
     M[&I] = i++;
 }
-} // end anonymous namespace
 
 bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 60033db38ee4..bd435968296d 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -15,17 +15,17 @@
 #include "RegisterCoalescer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 
 using namespace llvm;
@@ -102,14 +102,14 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
 }
 
 void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
-  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
-               << " to " << PrintReg(PhysReg, TRI) << ':');
+  DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI)
+               << " to " << printReg(PhysReg, TRI) << ':');
   assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
   VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
 
   foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
                                          const LiveRange &Range) {
-    DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range);
+    DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range);
     Matrix[Unit].unify(VirtReg, Range);
     return false;
   });
@@ -120,13 +120,13 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
 
 void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
   unsigned PhysReg = VRM->getPhys(VirtReg.reg);
-  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
-               << " from " << PrintReg(PhysReg, TRI) << ':');
+  DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI)
+               << " from " << printReg(PhysReg, TRI) << ':');
   VRM->clearVirt(VirtReg.reg);
 
   foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
                                          const LiveRange &Range) {
-    DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI));
+    DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI));
     Matrix[Unit].extract(VirtReg, Range);
     return false;
   });
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index f9ba4ffa6527..9f28db6287ba 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -19,8 +19,8 @@
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
@@ -97,23 +97,37 @@ static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
     LiveUnits.addReg(*CSR);
 }
 
-/// Adds pristine registers to the given \p LiveUnits. Pristine registers are
-/// callee saved registers that are unused in the function.
-static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) {
+void LiveRegUnits::addPristines(const MachineFunction &MF) {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   if (!MFI.isCalleeSavedInfoValid())
     return;
+  /// This function will usually be called on an empty object, handle this
+  /// as a special case.
+  if (empty()) {
+    /// Add all callee saved regs, then remove the ones that are saved and
+    /// restored.
+    addCalleeSavedRegs(*this, MF);
+    /// Remove the ones that are not saved/restored; they are pristine.
+    for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+      removeReg(Info.getReg());
+    return;
+  }
+  /// If a callee-saved register that is not pristine is already present
+  /// in the set, we should make sure that it stays in it. Precompute the
+  /// set of pristine registers in a separate object.
   /// Add all callee saved regs, then remove the ones that are saved+restored.
-  addCalleeSavedRegs(LiveUnits, MF);
+  LiveRegUnits Pristine(*TRI);
+  addCalleeSavedRegs(Pristine, MF);
   /// Remove the ones that are not saved/restored; they are pristine.
   for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
-    LiveUnits.removeReg(Info.getReg());
+    Pristine.removeReg(Info.getReg());
+  addUnits(Pristine.getBitVector());
 }
 
 void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
   if (!MBB.succ_empty()) {
-    addPristines(*this, MF);
+    addPristines(MF);
     // To get the live-outs we simply merge the live-ins of all successors.
     for (const MachineBasicBlock *Succ : MBB.successors())
       addBlockLiveIns(*this, *Succ);
@@ -127,6 +141,6 @@ void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
 
 void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
-  addPristines(*this, MF);
+  addPristines(MF);
   addBlockLiveIns(*this, MBB);
 }
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index b51f8b0aa6bb..b0e58b0e3e5f 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -14,12 +14,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "livestacks"
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index a9aec926115a..032dd66ae1d2 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -37,7 +37,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -235,7 +234,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
     // Otherwise, the last sub-register def implicitly defines this register.
     // e.g.
     // AH =
-    // AL = ... <imp-def EAX>, <imp-kill AH>
+    // AL = ... implicit-def EAX, implicit killed AH
     //    = AH
     // ...
     //    = EAX
@@ -321,17 +320,17 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   // AH =
   //
   //    = AX
-  //    = AL, AX<imp-use, kill>
+  //    = AL, implicit killed AX
   // AX =
   //
   // Or whole register is defined, but not used at all.
-  // AX<dead> =
+  // dead AX =
   // ...
   // AX =
   //
   // Or whole register is defined, but only partly used.
-  // AX<dead> = AL<imp-def>
-  //    = AL<kill>
+  // dead AX = implicit-def AL
+  //    = killed AL
   // AX =
   MachineInstr *LastPartDef = nullptr;
   unsigned LastPartDefDist = 0;
@@ -364,7 +363,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   if (!PhysRegUse[Reg]) {
     // Partial uses. Mark register def dead and add implicit def of
     // sub-registers which are used.
-    // EAX<dead>  = op  AL<imp-def>
+    // dead EAX  = op  implicit-def AL
     // That is, EAX def is dead but AL def extends pass it.
     PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
     for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index b109f1922a3e..c0da37ede849 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -14,29 +14,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
 
 using namespace llvm;
 
@@ -47,6 +48,7 @@ STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
 STATISTIC(NumReplacements, "Number of frame indices references replaced");
 
 namespace {
+
   class FrameRef {
     MachineBasicBlock::iterator MI; // Instr referencing the frame
     int64_t LocalOffset;            // Local offset of the frame idx referenced
@@ -72,9 +74,10 @@ namespace {
   };
 
   class LocalStackSlotPass: public MachineFunctionPass {
-    SmallVector<int64_t,16> LocalOffsets;
+    SmallVector<int64_t, 16> LocalOffsets;
+
     /// StackObjSet - A set of stack object indexes
-    typedef SmallSetVector<int, 8> StackObjSet;
+    using StackObjSet = SmallSetVector<int, 8>;
 
     void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset,
                            bool StackGrowsDown, unsigned &MaxAlign);
@@ -84,11 +87,14 @@ namespace {
                                int64_t &Offset, unsigned &MaxAlign);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     bool insertFrameReferenceRegisters(MachineFunction &Fn);
+
   public:
     static char ID; // Pass identification, replacement for typeid
+
     explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
       initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
     }
+
     bool runOnMachineFunction(MachineFunction &MF) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -96,20 +102,20 @@ namespace {
       AU.addRequired<StackProtector>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-
-  private:
   };
+
 } // end anonymous namespace
 
 char LocalStackSlotPass::ID = 0;
+
 char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+
 INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE,
                       "Local Stack Slot Allocation", false, false)
 INITIALIZE_PASS_DEPENDENCY(StackProtector)
 INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE,
                     "Local Stack Slot Allocation", false, false)
 
-
 bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
   MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -178,7 +184,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
                                            MachineFrameInfo &MFI,
                                            bool StackGrowsDown, int64_t &Offset,
                                            unsigned &MaxAlign) {
-
   for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
         E = UnassignedObjs.end(); I != E; ++I) {
     int i = *I;
@@ -189,7 +194,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
 
 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
 /// abstract stack objects.
-///
 void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo &MFI = Fn.getFrameInfo();
@@ -397,7 +401,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
         continue;
       }
 
-      const MachineFunction *MF = MI.getParent()->getParent();
+      const MachineFunction *MF = MI.getMF();
       const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
       BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
 
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index 0fc48d4e0b6b..0cf578b50563 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -16,11 +16,11 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetLowering.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
new file mode 100644
index 000000000000..4b676a60a8cd
--- /dev/null
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -0,0 +1,625 @@
+//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to employ a canonical code transformation so
+// that code compiled with slightly different IR passes can be diffed more
+// effectively than otherwise. This is done by renaming vregs in a given
+// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
+// move defs closer to their use inorder to reduce diffs caused by slightly
+// different schedules.
+//
+// Basic Usage:
+//
+// llc -o - -run-pass mir-canonicalizer example.mir
+//
+// Reorders instructions canonically.
+// Renames virtual register operands canonically.
+// Strips certain MIR artifacts (optionally).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+
+using namespace llvm;
+
+namespace llvm {
+extern char &MIRCanonicalizerID;
+} // namespace llvm
+
+#define DEBUG_TYPE "mir-canonicalizer"
+
+static cl::opt<unsigned>
+CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
+                           cl::value_desc("N"),
+                           cl::desc("Function number to canonicalize."));
+
+static cl::opt<unsigned>
+CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u),
+                             cl::value_desc("N"),
+                             cl::desc("BasicBlock number to canonicalize."));
+
+namespace {
+
+class MIRCanonicalizer : public MachineFunctionPass {
+public:
+  static char ID;
+  MIRCanonicalizer() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "Rename register operands in a canonical ordering.";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
+class TypedVReg {
+  VRType type;
+  unsigned reg;
+
+public:
+  TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {}
+  TypedVReg(VRType type) : type(type), reg(~0U) {
+    assert(type != RSE_Reg && "Expected a non-register type.");
+  }
+
+  bool isReg()        const { return type == RSE_Reg;          }
+  bool isFrameIndex() const { return type == RSE_FrameIndex;   }
+  bool isCandidate()  const { return type == RSE_NewCandidate; }
+
+  VRType getType() const { return type; }
+  unsigned getReg() const {
+    assert(this->isReg() && "Expected a virtual or physical register.");
+    return reg;
+  }
+};
+
+char MIRCanonicalizer::ID;
+
+char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
+
+INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
+                      "Rename Register Operands Canonically", false, false)
+
+INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
+                    "Rename Register Operands Canonically", false, false)
+
+static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+  std::vector<MachineBasicBlock *> RPOList;
+  for (auto MBB : RPOT) {
+    RPOList.push_back(MBB);
+  }
+
+  return RPOList;
+}
+
+// Set a dummy vreg. We use this vregs register class to generate throw-away
+// vregs that are used to skip vreg numbers so that vreg numbers line up.
+static unsigned GetDummyVReg(const MachineFunction &MF) {
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      for (auto &MO : MI.operands()) {
+        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          continue;
+        return MO.getReg();
+      }
+    }
+  }
+
+  return ~0U;
+}
+
+static bool rescheduleCanonically(MachineBasicBlock *MBB) {
+
+  bool Changed = false;
+
+  // Calculates the distance of MI from the begining of its parent BB.
+  auto getInstrIdx = [](const MachineInstr &MI) {
+    unsigned i = 0;
+    for (auto &CurMI : *MI.getParent()) {
+      if (&CurMI == &MI)
+        return i;
+      i++;
+    }
+    return ~0U;
+  };
+
+  // Pre-Populate vector of instructions to reschedule so that we don't
+  // clobber the iterator.
+  std::vector<MachineInstr *> Instructions;
+  for (auto &MI : *MBB) {
+    Instructions.push_back(&MI);
+  }
+
+  for (auto *II : Instructions) {
+    if (II->getNumOperands() == 0)
+      continue;
+
+    MachineOperand &MO = II->getOperand(0);
+    if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      continue;
+
+    DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
+
+    MachineInstr *Def = II;
+    unsigned Distance = ~0U;
+    MachineInstr *UseToBringDefCloserTo = nullptr;
+    MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+    for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
+      MachineInstr *UseInst = UO.getParent();
+
+      const unsigned DefLoc = getInstrIdx(*Def);
+      const unsigned UseLoc = getInstrIdx(*UseInst);
+      const unsigned Delta = (UseLoc - DefLoc);
+
+      if (UseInst->getParent() != Def->getParent())
+        continue;
+      if (DefLoc >= UseLoc)
+        continue;
+
+      if (Delta < Distance) {
+        Distance = Delta;
+        UseToBringDefCloserTo = UseInst;
+      }
+    }
+
+    const auto BBE = MBB->instr_end();
+    MachineBasicBlock::iterator DefI = BBE;
+    MachineBasicBlock::iterator UseI = BBE;
+
+    for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
+
+      if (DefI != BBE && UseI != BBE)
+        break;
+
+      if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo))
+        continue;
+
+      if (&*BBI == Def) {
+        DefI = BBI;
+        continue;
+      }
+
+      if (&*BBI == UseToBringDefCloserTo) {
+        UseI = BBI;
+        continue;
+      }
+    }
+
+    if (DefI == BBE || UseI == BBE)
+      continue;
+
+    DEBUG({
+      dbgs() << "Splicing ";
+      DefI->dump();
+      dbgs() << " right before: ";
+      UseI->dump();
+    });
+
+    Changed = true;
+    MBB->splice(UseI, MBB, DefI);
+  }
+
+  return Changed;
+}
+
+/// Here we find our candidates. What makes an interesting candidate?
+/// An candidate for a canonicalization tree root is normally any kind of
+/// instruction that causes side effects such as a store to memory or a copy to
+/// a physical register or a return instruction. We use these as an expression
+/// tree root that we walk inorder to build a canonical walk which should result
+/// in canoncal vreg renaming.
+static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
+  std::vector<MachineInstr *> Candidates;
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+    MachineInstr *MI = &*II;
+
+    bool DoesMISideEffect = false;
+
+    if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
+      const unsigned Dst = MI->getOperand(0).getReg();
+      DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);
+
+      for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
+        if (DoesMISideEffect) break;
+        DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
+      }
+    }
+
+    if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
+      continue;
+
+    DEBUG(dbgs() << "Found Candidate:  "; MI->dump(););
+    Candidates.push_back(MI);
+  }
+
+  return Candidates;
+}
+
+static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
+                            std::queue<TypedVReg> &RegQueue,
+                            std::vector<MachineInstr *> &VisitedMIs,
+                            const MachineBasicBlock *MBB) {
+
+  const MachineFunction &MF = *MBB->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  while (!RegQueue.empty()) {
+
+    auto TReg = RegQueue.front();
+    RegQueue.pop();
+
+    if (TReg.isFrameIndex()) {
+      DEBUG(dbgs() << "Popping frame index.\n";);
+      VRegs.push_back(TypedVReg(RSE_FrameIndex));
+      continue;
+    }
+
+    assert(TReg.isReg() && "Expected vreg or physreg.");
+    unsigned Reg = TReg.getReg();
+
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      DEBUG({
+        dbgs() << "Popping vreg ";
+        MRI.def_begin(Reg)->dump();
+        dbgs() << "\n";
+      });
+
+      if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
+            return TR.isReg() && TR.getReg() == Reg;
+          })) {
+        VRegs.push_back(TypedVReg(Reg));
+      }
+    } else {
+      DEBUG(dbgs() << "Popping physreg.\n";);
+      VRegs.push_back(TypedVReg(Reg));
+      continue;
+    }
+
+    for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
+      MachineInstr *Def = RI->getParent();
+
+      if (Def->getParent() != MBB)
+        continue;
+
+      if (llvm::any_of(VisitedMIs,
+                       [&](const MachineInstr *VMI) { return Def == VMI; })) {
+        break;
+      }
+
+      DEBUG({
+        dbgs() << "\n========================\n";
+        dbgs() << "Visited MI: ";
+        Def->dump();
+        dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
+        dbgs() << "\n========================\n";
+      });
+      VisitedMIs.push_back(Def);
+      for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
+
+        MachineOperand &MO = Def->getOperand(I);
+        if (MO.isFI()) {
+          DEBUG(dbgs() << "Pushing frame index.\n";);
+          RegQueue.push(TypedVReg(RSE_FrameIndex));
+        }
+
+        if (!MO.isReg())
+          continue;
+        RegQueue.push(TypedVReg(MO.getReg()));
+      }
+    }
+  }
+}
+
+// TODO: Work to remove this in the future. One day when we have named vregs
+// we should be able to form the canonical name based on some characteristic
+// we see in that point of the expression tree (like if we were to name based
+// on some sort of value numbering scheme).
+static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI,
+                      const TargetRegisterClass *RC) {
+  const unsigned VR_GAP = (++VRegGapIndex * 1000);
+
+  DEBUG({
+    dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to "
+           << VR_GAP << "\n";
+  });
+
+  unsigned I = MRI.createVirtualRegister(RC);
+  const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
+  while (I != E) {
+    I = MRI.createVirtualRegister(RC);
+  }
+}
+
+static std::map<unsigned, unsigned>
+GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
+                 const std::vector<unsigned> &renamedInOtherBB,
+                 MachineRegisterInfo &MRI,
+                 const TargetRegisterClass *RC) {
+  std::map<unsigned, unsigned> VRegRenameMap;
+  unsigned LastRenameReg = MRI.createVirtualRegister(RC);
+  bool FirstCandidate = true;
+
+  for (auto &vreg : VRegs) {
+    if (vreg.isFrameIndex()) {
+      // We skip one vreg for any frame index because there is a good chance
+      // (especially when comparing SelectionDAG to GlobalISel generated MIR)
+      // that in the other file we are just getting an incoming vreg that comes
+      // from a copy from a frame index. So it's safe to skip by one.
+      LastRenameReg = MRI.createVirtualRegister(RC);
+      DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
+      continue;
+    } else if (vreg.isCandidate()) {
+
+      // After the first candidate, for every subsequent candidate, we skip mod
+      // 10 registers so that the candidates are more likely to start at the
+      // same vreg number making it more likely that the canonical walk from the
+      // candidate insruction. We don't need to skip from the first candidate of
+      // the BasicBlock because we already skip ahead several vregs for each BB.
+      while (LastRenameReg % 10) {
+        if (!FirstCandidate) break;
+        LastRenameReg = MRI.createVirtualRegister(RC);
+
+        DEBUG({
+          dbgs() << "Skipping rename for new candidate " << LastRenameReg
+                 << "\n";
+        });
+      }
+      FirstCandidate = false;
+      continue;
+    } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) {
+      LastRenameReg = MRI.createVirtualRegister(RC);
+      DEBUG({
+        dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
+      });
+      continue;
+    }
+
+    auto Reg = vreg.getReg();
+    if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
+      DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";);
+      continue;
+    }
+
+    auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+    LastRenameReg = Rename;
+
+    if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
+      DEBUG(dbgs() << "Mapping vreg ";);
+      if (MRI.reg_begin(Reg) != MRI.reg_end()) {
+        DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
+      } else {
+        DEBUG(dbgs() << Reg;);
+      }
+      DEBUG(dbgs() << " to ";);
+      if (MRI.reg_begin(Rename) != MRI.reg_end()) {
+        DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
+      } else {
+        DEBUG(dbgs() << Rename;);
+      }
+      DEBUG(dbgs() << "\n";);
+
+      VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
+    }
+  }
+
+  return VRegRenameMap;
+}
+
+static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB,
+                           const std::map<unsigned, unsigned> &VRegRenameMap,
+                           MachineRegisterInfo &MRI) {
+  bool Changed = false;
+  for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
+
+    auto VReg = I->first;
+    auto Rename = I->second;
+
+    RenamedInOtherBB.push_back(Rename);
+
+    std::vector<MachineOperand *> RenameMOs;
+    for (auto &MO : MRI.reg_operands(VReg)) {
+      RenameMOs.push_back(&MO);
+    }
+
+    for (auto *MO : RenameMOs) {
+      Changed = true;
+      MO->setReg(Rename);
+
+      if (!MO->isDef())
+        MO->setIsKill(false);
+    }
+  }
+
+  return Changed;
+}
+
+static bool doDefKillClear(MachineBasicBlock *MBB) {
+  bool Changed = false;
+
+  for (auto &MI : *MBB) {
+    for (auto &MO : MI.operands()) {
+      if (!MO.isReg())
+        continue;
+      if (!MO.isDef() && MO.isKill()) {
+        Changed = true;
+        MO.setIsKill(false);
+      }
+
+      if (MO.isDef() && MO.isDead()) {
+        Changed = true;
+        MO.setIsDead(false);
+      }
+    }
+  }
+
+  return Changed;
+}
+
+static bool runOnBasicBlock(MachineBasicBlock *MBB,
+                            std::vector<StringRef> &bbNames,
+                            std::vector<unsigned> &renamedInOtherBB,
+                            unsigned &basicBlockNum, unsigned &VRegGapIndex) {
+
+  if (CanonicalizeBasicBlockNumber != ~0U) {
+    if (CanonicalizeBasicBlockNumber != basicBlockNum++)
+      return false;
+    DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";);
+  }
+
+  if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
+    DEBUG({
+      dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
+             << "\n";
+    });
+    return false;
+  }
+
+  DEBUG({
+    dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
+    dbgs() << "\n\n================================================\n\n";
+  });
+
+  bool Changed = false;
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  const unsigned DummyVReg = GetDummyVReg(MF);
+  const TargetRegisterClass *DummyRC =
+    (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg);
+  if (!DummyRC) return false;
+
+  bbNames.push_back(MBB->getName());
+  DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
+
+  DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
+  Changed |= rescheduleCanonically(MBB);
+  DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
+
+  std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
+  std::vector<MachineInstr *> VisitedMIs;
+  std::copy(Candidates.begin(), Candidates.end(),
+            std::back_inserter(VisitedMIs));
+
+  std::vector<TypedVReg> VRegs;
+  for (auto candidate : Candidates) {
+    VRegs.push_back(TypedVReg(RSE_NewCandidate));
+
+    std::queue<TypedVReg> RegQueue;
+
+    // Here we walk the vreg operands of a non-root node along our walk.
+    // The root nodes are the original candidates (stores normally).
+    // These are normally not the root nodes (except for the case of copies to
+    // physical registers).
+    for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
+      if (candidate->mayStore() || candidate->isBranch())
+        break;
+
+      MachineOperand &MO = candidate->getOperand(i);
+      if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+        continue;
+
+      DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
+      RegQueue.push(TypedVReg(MO.getReg()));
+    }
+
+    // Here we walk the root candidates. We start from the 0th operand because
+    // the root is normally a store to a vreg.
+    for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
+
+      if (!candidate->mayStore() && !candidate->isBranch())
+        break;
+
+      MachineOperand &MO = candidate->getOperand(i);
+
+      // TODO: Do we want to only add vregs here?
+      if (!MO.isReg() && !MO.isFI())
+        continue;
+
+      DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
+
+      RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) :
+                                  TypedVReg(RSE_FrameIndex));
+    }
+
+    doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
+  }
+
+  // If we have populated no vregs to rename then bail.
+  // The rest of this function does the vreg remaping.
+  if (VRegs.size() == 0)
+    return Changed;
+
+  // Skip some vregs, so we can recon where we'll land next.
+  SkipVRegs(VRegGapIndex, MRI, DummyRC);
+
+  auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC);
+  Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
+  Changed |= doDefKillClear(MBB);
+
+  DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";);
+  DEBUG(dbgs() << "\n\n================================================\n\n");
+  return Changed;
+}
+
+bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
+
+  static unsigned functionNum = 0;
+  if (CanonicalizeFunctionNumber != ~0U) {
+    if (CanonicalizeFunctionNumber != functionNum++)
+      return false;
+    DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";);
+  }
+
+  // we need a valid vreg to create a vreg type for skipping all those
+  // stray vreg numbers so reach alignment/canonical vreg values.
+  std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF);
+
+  DEBUG(
+    dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
+    dbgs() << "\n\n================================================\n\n";
+    dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
+    for (auto MBB : RPOList) {
+      dbgs() << MBB->getName() << "\n";
+    }
+    dbgs() << "\n\n================================================\n\n";
+  );
+
+  std::vector<StringRef> BBNames;
+  std::vector<unsigned> RenamedInOtherBB;
+
+  unsigned GapIdx = 0;
+  unsigned BBNum = 0;
+
+  bool Changed = false;
+
+  for (auto MBB : RPOList)
+    Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx);
+
+  return Changed;
+}
+
diff --git a/lib/CodeGen/MIRParser/LLVMBuild.txt b/lib/CodeGen/MIRParser/LLVMBuild.txt
index 2852124786e3..6269b005a985 100644
--- a/lib/CodeGen/MIRParser/LLVMBuild.txt
+++ b/lib/CodeGen/MIRParser/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = MIRParser
 parent = CodeGen
-required_libraries = AsmParser CodeGen Core MC Support Target
+required_libraries = AsmParser BinaryFormat CodeGen Core MC Support Target
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 58a655a4dee4..6adb7f1288d7 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -1,4 +1,4 @@
-//===- MILexer.cpp - Machine instructions lexer implementation ----------===//
+//===- MILexer.cpp - Machine instructions lexer implementation ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,27 +12,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "MILexer.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include <algorithm>
+#include <cassert>
 #include <cctype>
+#include <string>
 
 using namespace llvm;
 
 namespace {
 
-typedef function_ref<void(StringRef::iterator Loc, const Twine &)>
-    ErrorCallbackType;
+using ErrorCallbackType =
+    function_ref<void(StringRef::iterator Loc, const Twine &)>;
 
 /// This class provides a way to iterate and get characters from the source
 /// string.
 class Cursor {
-  const char *Ptr;
-  const char *End;
+  const char *Ptr = nullptr;
+  const char *End = nullptr;
 
 public:
-  Cursor(NoneType) : Ptr(nullptr), End(nullptr) {}
+  Cursor(NoneType) {}
 
   explicit Cursor(StringRef Str) {
     Ptr = Str.data();
@@ -202,14 +208,24 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("internal", MIToken::kw_internal)
       .Case("early-clobber", MIToken::kw_early_clobber)
       .Case("debug-use", MIToken::kw_debug_use)
+      .Case("renamable", MIToken::kw_renamable)
       .Case("tied-def", MIToken::kw_tied_def)
       .Case("frame-setup", MIToken::kw_frame_setup)
       .Case("debug-location", MIToken::kw_debug_location)
       .Case("same_value", MIToken::kw_cfi_same_value)
       .Case("offset", MIToken::kw_cfi_offset)
+      .Case("rel_offset", MIToken::kw_cfi_rel_offset)
       .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
       .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+      .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
+      .Case("escape", MIToken::kw_cfi_escape)
       .Case("def_cfa", MIToken::kw_cfi_def_cfa)
+      .Case("remember_state", MIToken::kw_cfi_remember_state)
+      .Case("restore", MIToken::kw_cfi_restore)
+      .Case("restore_state", MIToken::kw_cfi_restore_state)
+      .Case("undefined", MIToken::kw_cfi_undefined)
+      .Case("register", MIToken::kw_cfi_register)
+      .Case("window_save", MIToken::kw_cfi_window_save)
       .Case("blockaddress", MIToken::kw_blockaddress)
       .Case("intrinsic", MIToken::kw_intrinsic)
       .Case("target-index", MIToken::kw_target_index)
@@ -270,6 +286,9 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
     C.advance();
   StringRef Number = NumberRange.upto(C);
   unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
+  // TODO: The format bb.<id>.<irname> is supported only when it's not a
+  // reference. Once we deprecate the format where the irname shows up, we
+  // should only lex forward if it is a reference.
   if (C.peek() == '.') {
     C.advance(); // Skip '.'
     ++StringOffset;
@@ -490,6 +509,7 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
       .Case("!alias.scope", MIToken::md_alias_scope)
       .Case("!noalias", MIToken::md_noalias)
       .Case("!range", MIToken::md_range)
+      .Case("!DIExpression", MIToken::md_diexpr)
       .Default(MIToken::Error);
 }
 
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index 08b82e59c4fc..0204d549d5d4 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -1,4 +1,4 @@
-//===- MILexer.h - Lexer for machine instructions -------------------------===//
+//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,7 +18,7 @@
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include <functional>
+#include <string>
 
 namespace llvm {
 
@@ -60,14 +60,24 @@ struct MIToken {
     kw_internal,
     kw_early_clobber,
     kw_debug_use,
+    kw_renamable,
     kw_tied_def,
     kw_frame_setup,
     kw_debug_location,
     kw_cfi_same_value,
     kw_cfi_offset,
+    kw_cfi_rel_offset,
     kw_cfi_def_cfa_register,
     kw_cfi_def_cfa_offset,
+    kw_cfi_adjust_cfa_offset,
+    kw_cfi_escape,
     kw_cfi_def_cfa,
+    kw_cfi_register,
+    kw_cfi_remember_state,
+    kw_cfi_restore,
+    kw_cfi_restore_state,
+    kw_cfi_undefined,
+    kw_cfi_window_save,
     kw_blockaddress,
     kw_intrinsic,
     kw_target_index,
@@ -100,6 +110,7 @@ struct MIToken {
     md_alias_scope,
     md_noalias,
     md_range,
+    md_diexpr,
 
     // Identifier tokens
     Identifier,
@@ -132,14 +143,14 @@ struct MIToken {
   };
 
 private:
-  TokenKind Kind;
+  TokenKind Kind = Error;
   StringRef Range;
   StringRef StringValue;
   std::string StringValueStorage;
   APSInt IntVal;
 
 public:
-  MIToken() : Kind(Error) {}
+  MIToken() = default;
 
   MIToken &reset(TokenKind Kind, StringRef Range);
 
@@ -164,7 +175,8 @@ public:
     return Kind == kw_implicit || Kind == kw_implicit_define ||
            Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
            Kind == kw_undef || Kind == kw_internal ||
-           Kind == kw_early_clobber || Kind == kw_debug_use;
+           Kind == kw_early_clobber || Kind == kw_debug_use ||
+           Kind == kw_renamable;
   }
 
   bool isMemoryOperandFlag() const {
@@ -203,4 +215,4 @@ StringRef lexMIToken(
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c68d87b15a31..1a78ae3aad07 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MILexer.h"
 #include "MIParser.h"
+#include "MILexer.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -21,8 +21,8 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
@@ -33,12 +33,15 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -63,11 +66,8 @@
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cctype>
@@ -209,9 +209,11 @@ public:
   bool parseJumpTableIndexOperand(MachineOperand &Dest);
   bool parseExternalSymbolOperand(MachineOperand &Dest);
   bool parseMDNode(MDNode *&Node);
+  bool parseDIExpression(MDNode *&Node);
   bool parseMetadataOperand(MachineOperand &Dest);
   bool parseCFIOffset(int &Offset);
   bool parseCFIRegister(unsigned &Reg);
+  bool parseCFIEscapeValues(std::string& Values);
   bool parseCFIOperand(MachineOperand &Dest);
   bool parseIRBlock(BasicBlock *&BB, const Function &F);
   bool parseBlockAddressOperand(MachineOperand &Dest);
@@ -429,7 +431,7 @@ bool MIParser::parseBasicBlockDefinition(
         break;
       case MIToken::IRBlock:
         // TODO: Report an error when both name and ir block are specified.
-        if (parseIRBlock(BB, *MF.getFunction()))
+        if (parseIRBlock(BB, MF.getFunction()))
           return true;
         lex();
         break;
@@ -445,7 +447,7 @@ bool MIParser::parseBasicBlockDefinition(
 
   if (!Name.empty()) {
     BB = dyn_cast_or_null<BasicBlock>(
-        MF.getFunction()->getValueSymbolTable()->lookup(Name));
+        MF.getFunction().getValueSymbolTable()->lookup(Name));
     if (!BB)
       return error(Loc, Twine("basic block '") + Name +
                             "' is not defined in the function '" +
@@ -854,10 +856,14 @@ bool MIParser::parseStandaloneStackObject(int &FI) {
 
 bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
   lex();
-  if (Token.isNot(MIToken::exclaim))
+  if (Token.is(MIToken::exclaim)) {
+    if (parseMDNode(Node))
+      return true;
+  } else if (Token.is(MIToken::md_diexpr)) {
+    if (parseDIExpression(Node))
+      return true;
+  } else
     return error("expected a metadata node");
-  if (parseMDNode(Node))
-    return true;
   if (Token.isNot(MIToken::Eof))
     return error("expected end of string after the metadata node");
   return false;
@@ -1054,6 +1060,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
   case MIToken::kw_debug_use:
     Flags |= RegState::Debug;
     break;
+  case MIToken::kw_renamable:
+    Flags |= RegState::Renamable;
+    break;
   default:
     llvm_unreachable("The current token should be a register flag");
   }
@@ -1206,7 +1215,8 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
       Reg, Flags & RegState::Define, Flags & RegState::Implicit,
       Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
       Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug,
-      Flags & RegState::InternalRead);
+      Flags & RegState::InternalRead, Flags & RegState::Renamable);
+
   return false;
 }
 
@@ -1224,7 +1234,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
                                const Constant *&C) {
   auto Source = StringValue.str(); // The source has to be null terminated.
   SMDiagnostic Err;
-  C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(),
+  C = parseConstantValue(Source, Err, *MF.getFunction().getParent(),
                          &PFS.IRSlots);
   if (!C)
     return error(Loc + Err.getColumnNo(), Err.getMessage());
@@ -1244,7 +1254,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
     lex();
     return false;
   } else if (Token.is(MIToken::PointerType)) {
-    const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout();
+    const DataLayout &DL = MF.getDataLayout();
     unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
     Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
     lex();
@@ -1338,6 +1348,8 @@ bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
     return error(Twine("use of undefined machine basic block #") +
                  Twine(Number));
   MBB = MBBInfo->second;
+  // TODO: Only parse the name if it's a MachineBasicBlockLabel. Deprecate once
+  // we drop the <irname> from the bb.<id>.<irname> format.
   if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName())
     return error(Twine("the name of machine basic block #") + Twine(Number) +
                  " isn't '" + Token.stringValue() + "'");
@@ -1407,7 +1419,7 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
 bool MIParser::parseGlobalValue(GlobalValue *&GV) {
   switch (Token.kind()) {
   case MIToken::NamedGlobalValue: {
-    const Module *M = MF.getFunction()->getParent();
+    const Module *M = MF.getFunction().getParent();
     GV = M->getNamedValue(Token.stringValue());
     if (!GV)
       return error(Twine("use of undefined global value '") + Token.range() +
@@ -1492,6 +1504,7 @@ bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
 
 bool MIParser::parseMDNode(MDNode *&Node) {
   assert(Token.is(MIToken::exclaim));
+
   auto Loc = Token.location();
   lex();
   if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
@@ -1507,10 +1520,56 @@ bool MIParser::parseMDNode(MDNode *&Node) {
   return false;
 }
 
+bool MIParser::parseDIExpression(MDNode *&Expr) {
+  assert(Token.is(MIToken::md_diexpr));
+  lex();
+
+  // FIXME: Share this parsing with the IL parser.
+  SmallVector<uint64_t, 8> Elements;
+
+  if (expectAndConsume(MIToken::lparen))
+    return true;
+
+  if (Token.isNot(MIToken::rparen)) {
+    do {
+      if (Token.is(MIToken::Identifier)) {
+        if (unsigned Op = dwarf::getOperationEncoding(Token.stringValue())) {
+          lex();
+          Elements.push_back(Op);
+          continue;
+        }
+        return error(Twine("invalid DWARF op '") + Token.stringValue() + "'");
+      }
+
+      if (Token.isNot(MIToken::IntegerLiteral) ||
+          Token.integerValue().isSigned())
+        return error("expected unsigned integer");
+
+      auto &U = Token.integerValue();
+      if (U.ugt(UINT64_MAX))
+        return error("element too large, limit is " + Twine(UINT64_MAX));
+      Elements.push_back(U.getZExtValue());
+      lex();
+
+    } while (consumeIfPresent(MIToken::comma));
+  }
+
+  if (expectAndConsume(MIToken::rparen))
+    return true;
+
+  Expr = DIExpression::get(MF.getFunction().getContext(), Elements);
+  return false;
+}
+
 bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
   MDNode *Node = nullptr;
-  if (parseMDNode(Node))
-    return true;
+  if (Token.is(MIToken::exclaim)) {
+    if (parseMDNode(Node))
+      return true;
+  } else if (Token.is(MIToken::md_diexpr)) {
+    if (parseDIExpression(Node))
+      return true;
+  }
   Dest = MachineOperand::CreateMetadata(Node);
   return false;
 }
@@ -1541,6 +1600,21 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
   return false;
 }
 
+bool MIParser::parseCFIEscapeValues(std::string &Values) {
+  do {
+    if (Token.isNot(MIToken::HexLiteral))
+      return error("expected a hexadecimal literal");
+    unsigned Value;
+    if (getUnsigned(Value))
+      return true;
+    if (Value > UINT8_MAX)
+      return error("expected a 8-bit integer (too large)");
+    Values.push_back(static_cast<uint8_t>(Value));
+    lex();
+  } while (consumeIfPresent(MIToken::comma));
+  return false;
+}
+
 bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   auto Kind = Token.kind();
   lex();
@@ -1560,6 +1634,13 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
     CFIIndex =
         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
     break;
+  case MIToken::kw_cfi_rel_offset:
+    if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+        parseCFIOffset(Offset))
+      return true;
+    CFIIndex = MF.addFrameInst(
+        MCCFIInstruction::createRelOffset(nullptr, Reg, Offset));
+    break;
   case MIToken::kw_cfi_def_cfa_register:
     if (parseCFIRegister(Reg))
       return true;
@@ -1573,6 +1654,12 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
     CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
     break;
+  case MIToken::kw_cfi_adjust_cfa_offset:
+    if (parseCFIOffset(Offset))
+      return true;
+    CFIIndex = MF.addFrameInst(
+        MCCFIInstruction::createAdjustCfaOffset(nullptr, Offset));
+    break;
   case MIToken::kw_cfi_def_cfa:
     if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
         parseCFIOffset(Offset))
@@ -1581,6 +1668,42 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
     CFIIndex =
         MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
     break;
+  case MIToken::kw_cfi_remember_state:
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+    break;
+  case MIToken::kw_cfi_restore:
+    if (parseCFIRegister(Reg))
+      return true;
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg));
+    break;
+  case MIToken::kw_cfi_restore_state:
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+    break;
+  case MIToken::kw_cfi_undefined:
+    if (parseCFIRegister(Reg))
+      return true;
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg));
+    break;
+  case MIToken::kw_cfi_register: {
+    unsigned Reg2;
+    if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+        parseCFIRegister(Reg2))
+      return true;
+
+    CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::createRegister(nullptr, Reg, Reg2));
+    break;
+  }
+  case MIToken::kw_cfi_window_save:
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
+    break;
+  case MIToken::kw_cfi_escape: {
+    std::string Values;
+    if (parseCFIEscapeValues(Values))
+      return true;
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, Values));
+    break;
+  }
   default:
     // TODO: Parse the other CFI operands.
     llvm_unreachable("The current token should be a cfi operand");
@@ -1819,6 +1942,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
   case MIToken::kw_internal:
   case MIToken::kw_early_clobber:
   case MIToken::kw_debug_use:
+  case MIToken::kw_renamable:
   case MIToken::underscore:
   case MIToken::NamedRegister:
   case MIToken::VirtualRegister:
@@ -1851,13 +1975,23 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
     return parseExternalSymbolOperand(Dest);
   case MIToken::SubRegisterIndex:
     return parseSubRegisterIndexOperand(Dest);
+  case MIToken::md_diexpr:
   case MIToken::exclaim:
     return parseMetadataOperand(Dest);
   case MIToken::kw_cfi_same_value:
   case MIToken::kw_cfi_offset:
+  case MIToken::kw_cfi_rel_offset:
   case MIToken::kw_cfi_def_cfa_register:
   case MIToken::kw_cfi_def_cfa_offset:
+  case MIToken::kw_cfi_adjust_cfa_offset:
+  case MIToken::kw_cfi_escape:
   case MIToken::kw_cfi_def_cfa:
+  case MIToken::kw_cfi_register:
+  case MIToken::kw_cfi_remember_state:
+  case MIToken::kw_cfi_restore:
+  case MIToken::kw_cfi_restore_state:
+  case MIToken::kw_cfi_undefined:
+  case MIToken::kw_cfi_window_save:
     return parseCFIOperand(Dest);
   case MIToken::kw_blockaddress:
     return parseBlockAddressOperand(Dest);
@@ -1968,7 +2102,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
 bool MIParser::parseIRValue(const Value *&V) {
   switch (Token.kind()) {
   case MIToken::NamedIRValue: {
-    V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue());
+    V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
     break;
   }
   case MIToken::IRValue: {
@@ -2029,8 +2163,11 @@ bool MIParser::getHexUint(APInt &Result) {
     return true;
   StringRef V = S.substr(2);
   APInt A(V.size()*4, V, 16);
-  Result = APInt(A.getActiveBits(),
-                 ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
+
+  // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
+  // sure it isn't the case before constructing result.
+  unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
+  Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
   return false;
 }
 
@@ -2216,9 +2353,15 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
     Flags |= MachineMemOperand::MOStore;
   lex();
 
+  // Optional 'store' for operands that both load and store.
+  if (Token.is(MIToken::Identifier) && Token.stringValue() == "store") {
+    Flags |= MachineMemOperand::MOStore;
+    lex();
+  }
+
   // Optional synchronization scope.
   SyncScope::ID SSID;
-  if (parseOptionalScope(MF.getFunction()->getContext(), SSID))
+  if (parseOptionalScope(MF.getFunction().getContext(), SSID))
     return true;
 
   // Up to two atomic orderings (cmpxchg provides guarantees on failure).
@@ -2238,7 +2381,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
 
   MachinePointerInfo Ptr = MachinePointerInfo();
   if (Token.is(MIToken::Identifier)) {
-    const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
+    const char *Word =
+        ((Flags & MachineMemOperand::MOLoad) &&
+         (Flags & MachineMemOperand::MOStore))
+            ? "on"
+            : Flags & MachineMemOperand::MOLoad ? "from" : "into";
     if (Token.stringValue() != Word)
       return error(Twine("expected '") + Word + "'");
     lex();
@@ -2395,12 +2542,12 @@ static const BasicBlock *getIRBlockFromSlot(
 
 const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
   if (Slots2BasicBlocks.empty())
-    initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks);
+    initSlots2BasicBlocks(MF.getFunction(), Slots2BasicBlocks);
   return getIRBlockFromSlot(Slot, Slots2BasicBlocks);
 }
 
 const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
-  if (&F == MF.getFunction())
+  if (&F == &MF.getFunction())
     return getIRBlock(Slot);
   DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks;
   initSlots2BasicBlocks(F, CustomSlots2BasicBlocks);
@@ -2431,7 +2578,7 @@ static void initSlots2Values(const Function &F,
 
 const Value *MIParser::getIRValue(unsigned Slot) {
   if (Slots2Values.empty())
-    initSlots2Values(*MF.getFunction(), Slots2Values);
+    initSlots2Values(MF.getFunction(), Slots2Values);
   auto ValueInfo = Slots2Values.find(Slot);
   if (ValueInfo == Slots2Values.end())
     return nullptr;
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 78b57f357781..7d8e62736a34 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -120,7 +120,7 @@ public:
   bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
                                 std::vector<CalleeSavedInfo> &CSIInfo,
                                 const yaml::StringValue &RegisterSource,
-                                int FrameIdx);
+                                bool IsRestored, int FrameIdx);
 
   bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
                                   const yaml::MachineStackObject &Object,
@@ -214,6 +214,9 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
   case SourceMgr::DK_Note:
     Kind = DS_Note;
     break;
+  case SourceMgr::DK_Remark:
+    llvm_unreachable("remark unexpected");
+    break;
   }
   Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
 }
@@ -438,6 +441,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
 
     if (StringRef(VReg.Class.Value).equals("_")) {
       Info.Kind = VRegInfo::GENERIC;
+      Info.D.RegBank = nullptr;
     } else {
       const auto *RC = getRegClass(MF, VReg.Class.Value);
       if (RC) {
@@ -547,7 +551,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
                                         const yaml::MachineFunction &YamlMF) {
   MachineFunction &MF = PFS.MF;
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  const Function &F = *MF.getFunction();
+  const Function &F = MF.getFunction();
   const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
   MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
   MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
@@ -587,6 +591,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
     else
       ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
     MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+    MFI.setStackID(ObjectIdx, Object.StackID);
     if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
                                                          ObjectIdx))
              .second)
@@ -594,7 +599,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
                    Twine("redefinition of fixed stack object '%fixed-stack.") +
                        Twine(Object.ID.Value) + "'");
     if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
-                                 ObjectIdx))
+                                 Object.CalleeSavedRestored, ObjectIdx))
       return true;
   }
 
@@ -619,13 +624,15 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
           Object.Size, Object.Alignment,
           Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
     MFI.setObjectOffset(ObjectIdx, Object.Offset);
+    MFI.setStackID(ObjectIdx, Object.StackID);
+
     if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
              .second)
       return error(Object.ID.SourceRange.Start,
                    Twine("redefinition of stack object '%stack.") +
                        Twine(Object.ID.Value) + "'");
     if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
-                                 ObjectIdx))
+                                 Object.CalleeSavedRestored, ObjectIdx))
       return true;
     if (Object.LocalOffset)
       MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
@@ -650,14 +657,16 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
 
 bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
     std::vector<CalleeSavedInfo> &CSIInfo,
-    const yaml::StringValue &RegisterSource, int FrameIdx) {
+    const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) {
   if (RegisterSource.Value.empty())
     return false;
   unsigned Reg = 0;
   SMDiagnostic Error;
   if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error))
     return error(Error, RegisterSource.SourceRange);
-  CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx));
+  CalleeSavedInfo CSI(Reg, FrameIdx);
+  CSI.setRestored(IsRestored);
+  CSIInfo.push_back(CSI);
   return false;
 }
 
@@ -713,9 +722,13 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
     MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) {
   DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots;
   const MachineFunction &MF = PFS.MF;
-  const auto &M = *MF.getFunction()->getParent();
+  const auto &M = *MF.getFunction().getParent();
   SMDiagnostic Error;
   for (const auto &YamlConstant : YamlMF.Constants) {
+    if (YamlConstant.IsTargetSpecific)
+      // FIXME: Support target-specific constant pools
+      return error(YamlConstant.Value.SourceRange.Start,
+                   "Can't parse target-specific constant pool entries yet");
     const Constant *Value = dyn_cast_or_null<Constant>(
         parseConstantValue(YamlConstant.Value.Value, Error, M));
     if (!Value)
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index ddeacf1d1bfb..3568f96d2b9a 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -12,16 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -31,19 +33,20 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MIRPrinter.h"
-#include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Value.h"
@@ -57,13 +60,10 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cinttypes>
@@ -75,7 +75,8 @@
 
 using namespace llvm;
 
-static cl::opt<bool> SimplifyMIR("simplify-mir",
+static cl::opt<bool> SimplifyMIR(
+    "simplify-mir", cl::Hidden,
     cl::desc("Leave out unnecessary information when printing MIR"));
 
 namespace {
@@ -156,15 +157,13 @@ public:
   void print(const MachineBasicBlock &MBB);
 
   void print(const MachineInstr &MI);
-  void printMBBReference(const MachineBasicBlock &MBB);
   void printIRBlockReference(const BasicBlock &BB);
   void printIRValueReference(const Value &V);
   void printStackObjectReference(int FrameIndex);
   void printOffset(int64_t Offset);
-  void printTargetFlags(const MachineOperand &Op);
-  void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
-             unsigned I, bool ShouldPrintRegisterTies,
-             LLT TypeToPrint, bool IsDef = false);
+  void print(const MachineInstr &MI, unsigned OpIdx,
+             const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies,
+             LLT TypeToPrint, bool PrintDef = true);
   void print(const LLVMContext &Context, const TargetInstrInfo &TII,
              const MachineMemOperand &Op);
   void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);
@@ -192,23 +191,10 @@ template <> struct BlockScalarTraits<Module> {
 } // end namespace yaml
 } // end namespace llvm
 
-static void printReg(unsigned Reg, raw_ostream &OS,
-                     const TargetRegisterInfo *TRI) {
-  // TODO: Print Stack Slots.
-  if (!Reg)
-    OS << '_';
-  else if (TargetRegisterInfo::isVirtualRegister(Reg))
-    OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
-  else if (Reg < TRI->getNumRegs())
-    OS << '%' << StringRef(TRI->getName(Reg)).lower();
-  else
-    llvm_unreachable("Can't print this kind of register yet");
-}
-
-static void printReg(unsigned Reg, yaml::StringValue &Dest,
-                     const TargetRegisterInfo *TRI) {
+static void printRegMIR(unsigned Reg, yaml::StringValue &Dest,
+                        const TargetRegisterInfo *TRI) {
   raw_string_ostream OS(Dest.Value);
-  printReg(Reg, OS, TRI);
+  OS << printReg(Reg, TRI);
 }
 
 void MIRPrinter::print(const MachineFunction &MF) {
@@ -227,8 +213,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
       MachineFunctionProperties::Property::Selected);
 
   convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
-  ModuleSlotTracker MST(MF.getFunction()->getParent());
-  MST.incorporateFunction(*MF.getFunction());
+  ModuleSlotTracker MST(MF.getFunction().getParent());
+  MST.incorporateFunction(MF.getFunction());
   convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
   convertStackObjects(YamlMF, MF, MST);
   if (const auto *ConstantPool = MF.getConstantPool())
@@ -262,7 +248,7 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
     if (RegMask[I / 32] & (1u << (I % 32))) {
       if (IsRegInRegMaskFound)
         OS << ',';
-      printReg(I, OS, TRI);
+      OS << printReg(I, TRI);
       IsRegInRegMaskFound = true;
     }
   }
@@ -270,6 +256,14 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
   OS << ')';
 }
 
+static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest,
+                                const MachineRegisterInfo &RegInfo,
+                                const TargetRegisterInfo *TRI) {
+  raw_string_ostream OS(Dest.Value);
+  OS << printRegClassOrBank(Reg, RegInfo, TRI);
+}
+
+
 void MIRPrinter::convert(yaml::MachineFunction &MF,
                          const MachineRegisterInfo &RegInfo,
                          const TargetRegisterInfo *TRI) {
@@ -280,28 +274,19 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
     unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
     yaml::VirtualRegisterDefinition VReg;
     VReg.ID = I;
-    if (RegInfo.getRegClassOrNull(Reg))
-      VReg.Class =
-          StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
-    else if (RegInfo.getRegBankOrNull(Reg))
-      VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
-    else {
-      VReg.Class = std::string("_");
-      assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
-             "Generic registers must have a valid type");
-    }
+    ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);
     unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
     if (PreferredReg)
-      printReg(PreferredReg, VReg.PreferredRegister, TRI);
+      printRegMIR(PreferredReg, VReg.PreferredRegister, TRI);
     MF.VirtualRegisters.push_back(VReg);
   }
 
   // Print the live ins.
-  for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) {
+  for (std::pair<unsigned, unsigned> LI : RegInfo.liveins()) {
     yaml::MachineFunctionLiveIn LiveIn;
-    printReg(I->first, LiveIn.Register, TRI);
-    if (I->second)
-      printReg(I->second, LiveIn.VirtualRegister, TRI);
+    printRegMIR(LI.first, LiveIn.Register, TRI);
+    if (LI.second)
+      printRegMIR(LI.second, LiveIn.VirtualRegister, TRI);
     MF.LiveIns.push_back(LiveIn);
   }
 
@@ -311,7 +296,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
     std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
     for (const MCPhysReg *I = CalleeSavedRegs; *I; ++I) {
       yaml::FlowStringValue Reg;
-      printReg(*I, Reg, TRI);
+      printRegMIR(*I, Reg, TRI);
       CalleeSavedRegisters.push_back(Reg);
     }
     MF.CalleeSavedRegisters = CalleeSavedRegisters;
@@ -337,13 +322,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
   YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
   if (MFI.getSavePoint()) {
     raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
-    MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
-        .printMBBReference(*MFI.getSavePoint());
+    StrOS << printMBBReference(*MFI.getSavePoint());
   }
   if (MFI.getRestorePoint()) {
     raw_string_ostream StrOS(YamlMFI.RestorePoint.Value);
-    MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
-        .printMBBReference(*MFI.getRestorePoint());
+    StrOS << printMBBReference(*MFI.getRestorePoint());
   }
 }
 
@@ -366,6 +349,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
     YamlObject.Offset = MFI.getObjectOffset(I);
     YamlObject.Size = MFI.getObjectSize(I);
     YamlObject.Alignment = MFI.getObjectAlignment(I);
+    YamlObject.StackID = MFI.getStackID(I);
     YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
     YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
     YMF.FixedStackObjects.push_back(YamlObject);
@@ -392,6 +376,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
     YamlObject.Offset = MFI.getObjectOffset(I);
     YamlObject.Size = MFI.getObjectSize(I);
     YamlObject.Alignment = MFI.getObjectAlignment(I);
+    YamlObject.StackID = MFI.getStackID(I);
 
     YMF.StackObjects.push_back(YamlObject);
     StackObjectOperandMapping.insert(std::make_pair(
@@ -400,15 +385,20 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
 
   for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
     yaml::StringValue Reg;
-    printReg(CSInfo.getReg(), Reg, TRI);
+    printRegMIR(CSInfo.getReg(), Reg, TRI);
     auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
     assert(StackObjectInfo != StackObjectOperandMapping.end() &&
            "Invalid stack object index");
     const FrameIndexOperand &StackObject = StackObjectInfo->second;
-    if (StackObject.IsFixed)
+    if (StackObject.IsFixed) {
       YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
-    else
+      YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
+        CSInfo.isRestored();
+    } else {
       YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+      YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
+        CSInfo.isRestored();
+    }
   }
   for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
     auto LocalObject = MFI.getLocalFrameObjectMap(I);
@@ -456,17 +446,20 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
                          const MachineConstantPool &ConstantPool) {
   unsigned ID = 0;
   for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) {
-    // TODO: Serialize target specific constant pool entries.
-    if (Constant.isMachineConstantPoolEntry())
-      llvm_unreachable("Can't print target specific constant pool entries yet");
-
-    yaml::MachineConstantPoolValue YamlConstant;
     std::string Str;
     raw_string_ostream StrOS(Str);
-    Constant.Val.ConstVal->printAsOperand(StrOS);
+    if (Constant.isMachineConstantPoolEntry()) {
+      Constant.Val.MachineCPVal->print(StrOS);
+    } else {
+      Constant.Val.ConstVal->printAsOperand(StrOS);
+    }
+
+    yaml::MachineConstantPoolValue YamlConstant;
     YamlConstant.ID = ID++;
     YamlConstant.Value = StrOS.str();
     YamlConstant.Alignment = Constant.getAlignment();
+    YamlConstant.IsTargetSpecific = Constant.isMachineConstantPoolEntry();
+
     MF.Constants.push_back(YamlConstant);
   }
 }
@@ -482,8 +475,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
     Entry.ID = ID++;
     for (const auto *MBB : Table.MBBs) {
       raw_string_ostream StrOS(Str);
-      MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
-          .printMBBReference(*MBB);
+      StrOS << printMBBReference(*MBB);
       Entry.Blocks.push_back(StrOS.str());
       Str.clear();
     }
@@ -593,13 +585,19 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
   bool HasLineAttributes = false;
   // Print the successors
   bool canPredictProbs = canPredictBranchProbabilities(MBB);
-  if (!MBB.succ_empty() && (!SimplifyMIR || !canPredictProbs ||
-                            !canPredictSuccessors(MBB))) {
+  // Even if the list of successors is empty, if we cannot guess it,
+  // we need to print it to tell the parser that the list is empty.
+  // This is needed, because MI model unreachable as empty blocks
+  // with an empty successor list. If the parser would see that
+  // without the successor list, it would guess the code would
+  // fallthrough.
+  if ((!MBB.succ_empty() && !SimplifyMIR) || !canPredictProbs ||
+      !canPredictSuccessors(MBB)) {
     OS.indent(2) << "successors: ";
     for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
       if (I != MBB.succ_begin())
         OS << ", ";
-      printMBBReference(**I);
+      OS << printMBBReference(**I);
       if (!SimplifyMIR || !canPredictProbs)
         OS << '('
            << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator())
@@ -619,7 +617,7 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
       if (!First)
         OS << ", ";
       First = false;
-      printReg(LI.PhysReg, OS, &TRI);
+      OS << printReg(LI.PhysReg, &TRI);
       if (!LI.LaneMask.all())
         OS << ":0x" << PrintLaneMask(LI.LaneMask);
     }
@@ -648,46 +646,8 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
     OS.indent(2) << "}\n";
 }
 
-/// Return true when an instruction has tied register that can't be determined
-/// by the instruction's descriptor.
-static bool hasComplexRegisterTies(const MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
-    const auto &Operand = MI.getOperand(I);
-    if (!Operand.isReg() || Operand.isDef())
-      // Ignore the defined registers as MCID marks only the uses as tied.
-      continue;
-    int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
-    int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1;
-    if (ExpectedTiedIdx != TiedIdx)
-      return true;
-  }
-  return false;
-}
-
-static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx,
-                          SmallBitVector &PrintedTypes,
-                          const MachineRegisterInfo &MRI) {
-  const MachineOperand &Op = MI.getOperand(OpIdx);
-  if (!Op.isReg())
-    return LLT{};
-
-  if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands())
-    return MRI.getType(Op.getReg());
-
-  auto &OpInfo = MI.getDesc().OpInfo[OpIdx];
-  if (!OpInfo.isGenericType())
-    return MRI.getType(Op.getReg());
-
-  if (PrintedTypes[OpInfo.getGenericTypeIndex()])
-    return LLT{};
-
-  PrintedTypes.set(OpInfo.getGenericTypeIndex());
-  return MRI.getType(Op.getReg());
-}
-
 void MIPrinter::print(const MachineInstr &MI) {
-  const auto *MF = MI.getParent()->getParent();
+  const auto *MF = MI.getMF();
   const auto &MRI = MF->getRegInfo();
   const auto &SubTarget = MF->getSubtarget();
   const auto *TRI = SubTarget.getRegisterInfo();
@@ -698,16 +658,16 @@ void MIPrinter::print(const MachineInstr &MI) {
     assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
 
   SmallBitVector PrintedTypes(8);
-  bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
+  bool ShouldPrintRegisterTies = MI.hasComplexRegisterTies();
   unsigned I = 0, E = MI.getNumOperands();
   for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
          !MI.getOperand(I).isImplicit();
        ++I) {
     if (I)
       OS << ", ";
-    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
-          getTypeToPrint(MI, I, PrintedTypes, MRI),
-          /*IsDef=*/true);
+    print(MI, I, TRI, ShouldPrintRegisterTies,
+          MI.getTypeToPrint(I, PrintedTypes, MRI),
+          /*PrintDef=*/false);
   }
 
   if (I)
@@ -722,8 +682,8 @@ void MIPrinter::print(const MachineInstr &MI) {
   for (; I < E; ++I) {
     if (NeedComma)
       OS << ", ";
-    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
-          getTypeToPrint(MI, I, PrintedTypes, MRI));
+    print(MI, I, TRI, ShouldPrintRegisterTies,
+          MI.getTypeToPrint(I, PrintedTypes, MRI));
     NeedComma = true;
   }
 
@@ -736,7 +696,7 @@ void MIPrinter::print(const MachineInstr &MI) {
 
   if (!MI.memoperands_empty()) {
     OS << " :: ";
-    const LLVMContext &Context = MF->getFunction()->getContext();
+    const LLVMContext &Context = MF->getFunction().getContext();
     bool NeedComma = false;
     for (const auto *Op : MI.memoperands()) {
       if (NeedComma)
@@ -747,14 +707,6 @@ void MIPrinter::print(const MachineInstr &MI) {
   }
 }
 
-void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
-  OS << "%bb." << MBB.getNumber();
-  if (const auto *BB = MBB.getBasicBlock()) {
-    if (BB->hasName())
-      OS << '.' << BB->getName();
-  }
-}
-
 static void printIRSlotNumber(raw_ostream &OS, int Slot) {
   if (Slot == -1)
     OS << "<badref>";
@@ -806,13 +758,8 @@ void MIPrinter::printStackObjectReference(int FrameIndex) {
   assert(ObjectInfo != StackObjectOperandMapping.end() &&
          "Invalid frame index");
   const FrameIndexOperand &Operand = ObjectInfo->second;
-  if (Operand.IsFixed) {
-    OS << "%fixed-stack." << Operand.ID;
-    return;
-  }
-  OS << "%stack." << Operand.ID;
-  if (!Operand.Name.empty())
-    OS << '.' << Operand.Name;
+  MachineOperand::printStackObjectReference(OS, Operand.ID, Operand.IsFixed,
+                                            Operand.Name);
 }
 
 void MIPrinter::printOffset(int64_t Offset) {
@@ -825,154 +772,43 @@ void MIPrinter::printOffset(int64_t Offset) {
   OS << " + " << Offset;
 }
 
-static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
-  auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
-  for (const auto &I : Flags) {
-    if (I.first == TF) {
-      return I.second;
-    }
-  }
-  return nullptr;
-}
-
-void MIPrinter::printTargetFlags(const MachineOperand &Op) {
-  if (!Op.getTargetFlags())
-    return;
-  const auto *TII =
-      Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo();
-  assert(TII && "expected instruction info");
-  auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
-  OS << "target-flags(";
-  const bool HasDirectFlags = Flags.first;
-  const bool HasBitmaskFlags = Flags.second;
-  if (!HasDirectFlags && !HasBitmaskFlags) {
-    OS << "<unknown>) ";
-    return;
-  }
-  if (HasDirectFlags) {
-    if (const auto *Name = getTargetFlagName(TII, Flags.first))
-      OS << Name;
-    else
-      OS << "<unknown target flag>";
-  }
-  if (!HasBitmaskFlags) {
-    OS << ") ";
-    return;
-  }
-  bool IsCommaNeeded = HasDirectFlags;
-  unsigned BitMask = Flags.second;
-  auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
-  for (const auto &Mask : BitMasks) {
-    // Check if the flag's bitmask has the bits of the current mask set.
-    if ((BitMask & Mask.first) == Mask.first) {
-      if (IsCommaNeeded)
-        OS << ", ";
-      IsCommaNeeded = true;
-      OS << Mask.second;
-      // Clear the bits which were serialized from the flag's bitmask.
-      BitMask &= ~(Mask.first);
-    }
-  }
-  if (BitMask) {
-    // When the resulting flag's bitmask isn't zero, we know that we didn't
-    // serialize all of the bit flags.
-    if (IsCommaNeeded)
-      OS << ", ";
-    OS << "<unknown bitmask target flag>";
-  }
-  OS << ") ";
-}
-
-static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
-  const auto *TII = MF.getSubtarget().getInstrInfo();
-  assert(TII && "expected instruction info");
-  auto Indices = TII->getSerializableTargetIndices();
-  for (const auto &I : Indices) {
-    if (I.first == Index) {
-      return I.second;
-    }
-  }
-  return nullptr;
-}
-
-void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
-                      unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint,
-                      bool IsDef) {
-  printTargetFlags(Op);
+void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
+                      const TargetRegisterInfo *TRI,
+                      bool ShouldPrintRegisterTies, LLT TypeToPrint,
+                      bool PrintDef) {
+  const MachineOperand &Op = MI.getOperand(OpIdx);
   switch (Op.getType()) {
-  case MachineOperand::MO_Register:
-    if (Op.isImplicit())
-      OS << (Op.isDef() ? "implicit-def " : "implicit ");
-    else if (!IsDef && Op.isDef())
-      // Print the 'def' flag only when the operand is defined after '='.
-      OS << "def ";
-    if (Op.isInternalRead())
-      OS << "internal ";
-    if (Op.isDead())
-      OS << "dead ";
-    if (Op.isKill())
-      OS << "killed ";
-    if (Op.isUndef())
-      OS << "undef ";
-    if (Op.isEarlyClobber())
-      OS << "early-clobber ";
-    if (Op.isDebug())
-      OS << "debug-use ";
-    printReg(Op.getReg(), OS, TRI);
-    // Print the sub register.
-    if (Op.getSubReg() != 0)
-      OS << '.' << TRI->getSubRegIndexName(Op.getSubReg());
-    if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
-      OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
-    if (TypeToPrint.isValid())
-      OS << '(' << TypeToPrint << ')';
-    break;
   case MachineOperand::MO_Immediate:
-    OS << Op.getImm();
-    break;
+    if (MI.isOperandSubregIdx(OpIdx)) {
+      MachineOperand::printTargetFlags(OS, Op);
+      MachineOperand::printSubregIdx(OS, Op.getImm(), TRI);
+      break;
+    }
+    LLVM_FALLTHROUGH;
+  case MachineOperand::MO_Register:
   case MachineOperand::MO_CImmediate:
-    Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
-    break;
-  case MachineOperand::MO_FPImmediate:
-    Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
-    break;
   case MachineOperand::MO_MachineBasicBlock:
-    printMBBReference(*Op.getMBB());
-    break;
-  case MachineOperand::MO_FrameIndex:
-    printStackObjectReference(Op.getIndex());
-    break;
   case MachineOperand::MO_ConstantPoolIndex:
-    OS << "%const." << Op.getIndex();
-    printOffset(Op.getOffset());
-    break;
   case MachineOperand::MO_TargetIndex:
-    OS << "target-index(";
-    if (const auto *Name = getTargetIndexName(
-            *Op.getParent()->getParent()->getParent(), Op.getIndex()))
-      OS << Name;
-    else
-      OS << "<unknown>";
-    OS << ')';
-    printOffset(Op.getOffset());
-    break;
   case MachineOperand::MO_JumpTableIndex:
-    OS << "%jump-table." << Op.getIndex();
-    break;
-  case MachineOperand::MO_ExternalSymbol: {
-    StringRef Name = Op.getSymbolName();
-    OS << '$';
-    if (Name.empty()) {
-      OS << "\"\"";
-    } else {
-      printLLVMNameWithoutPrefix(OS, Name);
-    }
-    printOffset(Op.getOffset());
+  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_RegisterLiveOut:
+  case MachineOperand::MO_Metadata:
+  case MachineOperand::MO_MCSymbol: {
+    unsigned TiedOperandIdx = 0;
+    if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
+      TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
+    const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
+    Op.print(OS, MST, TypeToPrint, PrintDef, ShouldPrintRegisterTies,
+             TiedOperandIdx, TRI, TII);
     break;
   }
-  case MachineOperand::MO_GlobalAddress:
-    Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
-    printOffset(Op.getOffset());
+  case MachineOperand::MO_FPImmediate:
+    Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+    break;
+  case MachineOperand::MO_FrameIndex:
+    printStackObjectReference(Op.getIndex());
     break;
   case MachineOperand::MO_BlockAddress:
     OS << "blockaddress(";
@@ -991,29 +827,8 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
       printCustomRegMask(Op.getRegMask(), OS, TRI);
     break;
   }
-  case MachineOperand::MO_RegisterLiveOut: {
-    const uint32_t *RegMask = Op.getRegLiveOut();
-    OS << "liveout(";
-    bool IsCommaNeeded = false;
-    for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
-      if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
-        if (IsCommaNeeded)
-          OS << ", ";
-        printReg(Reg, OS, TRI);
-        IsCommaNeeded = true;
-      }
-    }
-    OS << ")";
-    break;
-  }
-  case MachineOperand::MO_Metadata:
-    Op.getMetadata()->printAsOperand(OS, MST);
-    break;
-  case MachineOperand::MO_MCSymbol:
-    OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
-    break;
   case MachineOperand::MO_CFIIndex: {
-    const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+    const MachineFunction &MF = *Op.getParent()->getMF();
     print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI);
     break;
   }
@@ -1022,7 +837,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
     if (ID < Intrinsic::num_intrinsics)
       OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
     else {
-      const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+      const MachineFunction &MF = *Op.getParent()->getMF();
       const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
       OS << "intrinsic(@" << TII->getName(ID) << ')';
     }
@@ -1068,12 +883,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
   if (Op.getFlags() & MachineMemOperand::MOTargetFlag3)
     OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3)
        << "\" ";
+
+  assert((Op.isLoad() || Op.isStore()) && "machine memory operand must be a load or store (or both)");
   if (Op.isLoad())
     OS << "load ";
-  else {
-    assert(Op.isStore() && "Non load machine operand must be a store");
+  if (Op.isStore())
     OS << "store ";
-  }
 
   printSyncScope(Context, Op.getSyncScopeID());
 
@@ -1084,10 +899,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
 
   OS << Op.getSize();
   if (const Value *Val = Op.getValue()) {
-    OS << (Op.isLoad() ? " from " : " into ");
+    OS << ((Op.isLoad() && Op.isStore()) ? " on "
+                                         : Op.isLoad() ? " from " : " into ");
     printIRValueReference(*Val);
   } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) {
-    OS << (Op.isLoad() ? " from " : " into ");
+    OS << ((Op.isLoad() && Op.isStore()) ? " on "
+                                         : Op.isLoad() ? " from " : " into ");
     assert(PVal && "Expected a pseudo source value");
     switch (PVal->kind()) {
     case PseudoSourceValue::Stack:
@@ -1168,7 +985,7 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
     OS << "<badreg>";
     return;
   }
-  printReg(Reg, OS, TRI);
+  OS << printReg(Reg, TRI);
 }
 
 void MIPrinter::print(const MCCFIInstruction &CFI,
@@ -1176,36 +993,96 @@ void MIPrinter::print(const MCCFIInstruction &CFI,
   switch (CFI.getOperation()) {
   case MCCFIInstruction::OpSameValue:
     OS << "same_value ";
-    if (CFI.getLabel())
-      OS << "<mcsymbol> ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
     printCFIRegister(CFI.getRegister(), OS, TRI);
     break;
+  case MCCFIInstruction::OpRememberState:
+    OS << "remember_state ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    break;
+  case MCCFIInstruction::OpRestoreState:
+    OS << "restore_state ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    break;
   case MCCFIInstruction::OpOffset:
     OS << "offset ";
-    if (CFI.getLabel())
-      OS << "<mcsymbol> ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
     printCFIRegister(CFI.getRegister(), OS, TRI);
     OS << ", " << CFI.getOffset();
     break;
   case MCCFIInstruction::OpDefCfaRegister:
     OS << "def_cfa_register ";
-    if (CFI.getLabel())
-      OS << "<mcsymbol> ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
     printCFIRegister(CFI.getRegister(), OS, TRI);
     break;
   case MCCFIInstruction::OpDefCfaOffset:
     OS << "def_cfa_offset ";
-    if (CFI.getLabel())
-      OS << "<mcsymbol> ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
     OS << CFI.getOffset();
     break;
   case MCCFIInstruction::OpDefCfa:
     OS << "def_cfa ";
-    if (CFI.getLabel())
-      OS << "<mcsymbol> ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
     printCFIRegister(CFI.getRegister(), OS, TRI);
     OS << ", " << CFI.getOffset();
     break;
+  case MCCFIInstruction::OpRelOffset:
+    OS << "rel_offset ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    OS << ", " << CFI.getOffset();
+    break;
+  case MCCFIInstruction::OpAdjustCfaOffset:
+    OS << "adjust_cfa_offset ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    OS << CFI.getOffset();
+    break;
+  case MCCFIInstruction::OpRestore:
+    OS << "restore ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    break;
+  case MCCFIInstruction::OpEscape: {
+    OS << "escape ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    if (!CFI.getValues().empty()) {
+      size_t e = CFI.getValues().size() - 1;
+      for (size_t i = 0; i < e; ++i)
+        OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", ";
+      OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", ";
+    }
+    break;
+  }
+  case MCCFIInstruction::OpUndefined:
+    OS << "undefined ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    break;
+  case MCCFIInstruction::OpRegister:
+    OS << "register ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    OS << ", ";
+    printCFIRegister(CFI.getRegister2(), OS, TRI);
+    break;
+  case MCCFIInstruction::OpWindowSave:
+    OS << "window_save ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    break;
   default:
     // TODO: Print the other CFI Operations.
     OS << "<unserializable cfi operation>";
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
index 09354cf70c3c..1a8427430ea0 100644
--- a/lib/CodeGen/MIRPrintingPass.cpp
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/CodeGen/MIRPrinter.h"
 
-#include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 81597afe6b02..209abf34d885 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -13,7 +13,7 @@
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -21,6 +21,9 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -30,10 +33,7 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -42,6 +42,8 @@ using namespace llvm;
 MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
     : BB(B), Number(-1), xParent(&MF) {
   Insts.Parent = this;
+  if (B)
+    IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
 }
 
 MachineBasicBlock::~MachineBasicBlock() {
@@ -68,6 +70,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   return OS;
 }
 
+Printable llvm::printMBBReference(const MachineBasicBlock &MBB) {
+  return Printable([&MBB](raw_ostream &OS) { return MBB.printAsOperand(OS); });
+}
+
 /// When an MBB is added to an MF, we need to update the parent pointer of the
 /// MBB, the MBB numbering, and any instructions in the MBB to be on the right
 /// operand list for registers.
@@ -111,7 +117,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
   assert(N->getParent() && "machine instruction not in a basic block");
 
   // Remove from the use/def lists.
-  if (MachineFunction *MF = N->getParent()->getParent())
+  if (MachineFunction *MF = N->getMF())
     N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
 
   N->setParent(nullptr);
@@ -261,8 +267,8 @@ void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes)
        << " is null\n";
     return;
   }
-  const Function *F = MF->getFunction();
-  const Module *M = F ? F->getParent() : nullptr;
+  const Function &F = MF->getFunction();
+  const Module *M = F.getParent();
   ModuleSlotTracker MST(M);
   print(OS, MST, Indexes);
 }
@@ -279,7 +285,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
   if (Indexes)
     OS << Indexes->getMBBStartIdx(this) << '\t';
 
-  OS << "BB#" << getNumber() << ": ";
+  OS << printMBBReference(*this) << ": ";
 
   const char *Comma = "";
   if (const BasicBlock *LBB = getBasicBlock()) {
@@ -300,7 +306,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
     if (Indexes) OS << '\t';
     OS << "    Live Ins:";
     for (const auto &LI : LiveIns) {
-      OS << ' ' << PrintReg(LI.PhysReg, TRI);
+      OS << ' ' << printReg(LI.PhysReg, TRI);
       if (!LI.LaneMask.all())
         OS << ':' << PrintLaneMask(LI.LaneMask);
     }
@@ -311,7 +317,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
     if (Indexes) OS << '\t';
     OS << "    Predecessors according to CFG:";
     for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
-      OS << " BB#" << (*PI)->getNumber();
+      OS << " " << printMBBReference(*(*PI));
     OS << '\n';
   }
 
@@ -332,17 +338,23 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
     if (Indexes) OS << '\t';
     OS << "    Successors according to CFG:";
     for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
-      OS << " BB#" << (*SI)->getNumber();
+      OS << " " << printMBBReference(*(*SI));
       if (!Probs.empty())
         OS << '(' << *getProbabilityIterator(SI) << ')';
     }
     OS << '\n';
   }
+  if (IrrLoopHeaderWeight) {
+    if (Indexes) OS << '\t';
+    OS << "    Irreducible loop header weight: "
+       << IrrLoopHeaderWeight.getValue();
+    OS << '\n';
+  }
 }
 
 void MachineBasicBlock::printAsOperand(raw_ostream &OS,
                                        bool /*PrintType*/) const {
-  OS << "BB#" << getNumber();
+  OS << "%bb." << getNumber();
 }
 
 void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
@@ -759,10 +771,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
 
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
   MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
-  DEBUG(dbgs() << "Splitting critical edge:"
-        " BB#" << getNumber()
-        << " -- BB#" << NMBB->getNumber()
-        << " -- BB#" << Succ->getNumber() << '\n');
+  DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this)
+               << " -- " << printMBBReference(*NMBB) << " -- "
+               << printMBBReference(*Succ) << '\n');
 
   LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
   SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
@@ -1015,8 +1026,8 @@ bool MachineBasicBlock::canSplitCriticalEdge(
   // case that we can't handle. Since this never happens in properly optimized
   // code, just skip those edges.
   if (TBB && TBB == FBB) {
-    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
-                 << getNumber() << '\n');
+    DEBUG(dbgs() << "Won't split critical edge after degenerate "
+                 << printMBBReference(*this) << '\n');
     return false;
   }
   return true;
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 4d1ec11df46c..3459a9f71a73 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -12,23 +12,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
+#include <string>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "machine-block-freq"
 
-
 static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
     "view-machine-block-freq-propagation-dags", cl::Hidden,
     cl::desc("Pop up a window to show a dag displaying how machine block "
@@ -42,6 +42,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
                           "integer fractional block frequency representation."),
                clEnumValN(GVDT_Count, "count", "display a graph using the real "
                                                "profile count if available.")));
+
 // Similar option above, but used to control BFI display only after MBP pass
 cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
     "view-block-layout-with-bfi", cl::Hidden,
@@ -62,10 +63,19 @@ cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
 // Command line option to specify the name of the function for CFG dump
 // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
 extern cl::opt<std::string> ViewBlockFreqFuncName;
+
 // Command line option to specify hot frequency threshold.
 // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-hot-freq-perc=
 extern cl::opt<unsigned> ViewHotFreqPercent;
 
+static cl::opt<bool> PrintMachineBlockFreq(
+    "print-machine-bfi", cl::init(false), cl::Hidden,
+    cl::desc("Print the machine block frequency info."));
+
+// Command line option to specify the name of the function for block frequency
+// dump. Defined in Analysis/BlockFrequencyInfo.cpp.
+extern cl::opt<std::string> PrintBlockFreqFuncName;
+
 static GVDAGType getGVDT() {
   if (ViewBlockLayoutWithBFI != GVDT_None)
     return ViewBlockLayoutWithBFI;
@@ -76,9 +86,9 @@ static GVDAGType getGVDT() {
 namespace llvm {
 
 template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
-  typedef const MachineBasicBlock *NodeRef;
-  typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
-  typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator;
+  using NodeRef = const MachineBasicBlock *;
+  using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
+  using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>;
 
   static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) {
     return &G->getFunction()->front();
@@ -99,21 +109,21 @@ template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
   }
 };
 
-typedef BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo,
-                              MachineBranchProbabilityInfo>
-    MBFIDOTGraphTraitsBase;
+using MBFIDOTGraphTraitsBase =
+    BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo,
+                          MachineBranchProbabilityInfo>;
+
 template <>
 struct DOTGraphTraits<MachineBlockFrequencyInfo *>
     : public MBFIDOTGraphTraitsBase {
-  explicit DOTGraphTraits(bool isSimple = false)
-      : MBFIDOTGraphTraitsBase(isSimple), CurFunc(nullptr), LayoutOrderMap() {}
-
-  const MachineFunction *CurFunc;
+  const MachineFunction *CurFunc = nullptr;
   DenseMap<const MachineBasicBlock *, int> LayoutOrderMap;
 
+  explicit DOTGraphTraits(bool isSimple = false)
+      : MBFIDOTGraphTraitsBase(isSimple) {}
+
   std::string getNodeLabel(const MachineBasicBlock *Node,
                            const MachineBlockFrequencyInfo *Graph) {
-
     int layout_order = -1;
     // Attach additional ordering information if 'isSimple' is false.
     if (!isSimple()) {
@@ -163,7 +173,7 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
   initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
 }
 
-MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {}
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
 
 void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineBranchProbabilityInfo>();
@@ -183,6 +193,11 @@ void MachineBlockFrequencyInfo::calculate(
        F.getName().equals(ViewBlockFreqFuncName))) {
     view("MachineBlockFrequencyDAGS." + F.getName());
   }
+  if (PrintMachineBlockFreq &&
+      (PrintBlockFreqFuncName.empty() ||
+       F.getName().equals(PrintBlockFreqFuncName))) {
+    MBFI->print(dbgs());
+  }
 }
 
 bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
@@ -209,14 +224,20 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
 
 Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
     const MachineBasicBlock *MBB) const {
-  const Function *F = MBFI->getFunction()->getFunction();
-  return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None;
+  const Function &F = MBFI->getFunction()->getFunction();
+  return MBFI ? MBFI->getBlockProfileCount(F, MBB) : None;
 }
 
 Optional<uint64_t>
 MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
-  const Function *F = MBFI->getFunction()->getFunction();
-  return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None;
+  const Function &F = MBFI->getFunction()->getFunction();
+  return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None;
+}
+
+bool
+MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) {
+  assert(MBFI && "Expected analysis to be available");
+  return MBFI->isIrrLoopHeader(MBB);
 }
 
 const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 447ad629885b..4ce689607730 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1,4 +1,4 @@
-//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -26,7 +26,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "BranchFolding.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -39,19 +42,33 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
-#include <functional>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <tuple>
 #include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "block-placement"
@@ -91,11 +108,17 @@ static cl::opt<unsigned> LoopToColdBlockRatio(
              "(frequency of block) is greater than this ratio"),
     cl::init(5), cl::Hidden);
 
+static cl::opt<bool> ForceLoopColdBlock(
+    "force-loop-cold-block",
+    cl::desc("Force outlining cold blocks from loops."),
+    cl::init(false), cl::Hidden);
+
 static cl::opt<bool>
     PreciseRotationCost("precise-rotation-cost",
                         cl::desc("Model the cost of loop rotation more "
                                  "precisely by using profile data."),
                         cl::init(false), cl::Hidden);
+
 static cl::opt<bool>
     ForcePreciseRotationCost("force-precise-rotation-cost",
                              cl::desc("Force the use of precise cost "
@@ -138,7 +161,7 @@ static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
     "tail-dup-placement-aggressive-threshold",
     cl::desc("Instruction cutoff for aggressive tail duplication during "
              "layout. Used at -O3. Tail merging during layout is forced to "
-             "have a threshold that won't conflict."), cl::init(3),
+             "have a threshold that won't conflict."), cl::init(4),
     cl::Hidden);
 
 // Heuristic for tail duplication.
@@ -172,12 +195,12 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
 extern cl::opt<std::string> ViewBlockFreqFuncName;
 
 namespace {
+
 class BlockChain;
+
 /// \brief Type for our function-wide basic block -> block chain mapping.
-typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType;
-}
+using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;
 
-namespace {
 /// \brief A chain of blocks which will be laid out contiguously.
 ///
 /// This is the datastructure representing a chain of consecutive blocks that
@@ -211,14 +234,14 @@ public:
   /// function. It also registers itself as the chain that block participates
   /// in with the BlockToChain mapping.
   BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
-      : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
+      : Blocks(1, BB), BlockToChain(BlockToChain) {
     assert(BB && "Cannot create a chain with a null basic block");
     BlockToChain[BB] = this;
   }
 
   /// \brief Iterator over blocks within the chain.
-  typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
-  typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator;
+  using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator;
+  using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator;
 
   /// \brief Beginning of blocks within the chain.
   iterator begin() { return Blocks.begin(); }
@@ -286,14 +309,12 @@ public:
   ///
   /// Note: This field is reinitialized multiple times - once for each loop,
   /// and then once for the function as a whole.
-  unsigned UnscheduledPredecessors;
+  unsigned UnscheduledPredecessors = 0;
 };
-}
 
-namespace {
 class MachineBlockPlacement : public MachineFunctionPass {
-  /// \brief A typedef for a block filter set.
-  typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet;
+  /// \brief A type for a block filter set.
+  using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
 
   /// Pair struct containing basic block and taildup profitiability
   struct BlockAndTailDupResult {
@@ -428,6 +449,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
   void fillWorkLists(const MachineBasicBlock *MBB,
                      SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
                      const BlockFilterSet *BlockFilter);
+
   void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
                   BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *findBestLoopTop(
@@ -454,31 +476,37 @@ class MachineBlockPlacement : public MachineFunctionPass {
     const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
     BranchProbability AdjustedSumProb,
     const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
   /// Check for a trellis layout.
   bool isTrellis(const MachineBasicBlock *BB,
                  const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
                  const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
   /// Get the best successor given a trellis layout.
   BlockAndTailDupResult getBestTrellisSuccessor(
       const MachineBasicBlock *BB,
       const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
       BranchProbability AdjustedSumProb, const BlockChain &Chain,
       const BlockFilterSet *BlockFilter);
+
   /// Get the best pair of non-conflicting edges.
   static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
       const MachineBasicBlock *BB,
       MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
+
   /// Returns true if a block can tail duplicate into all unplaced
   /// predecessors. Filters based on loop.
   bool canTailDuplicateUnplacedPreds(
       const MachineBasicBlock *BB, MachineBasicBlock *Succ,
       const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+
   /// Find chains of triangles to tail-duplicate where a global analysis works,
   /// but a local analysis would not find them.
   void precomputeTriangleChains();
 
 public:
   static char ID; // Pass identification, replacement for typeid
+
   MachineBlockPlacement() : MachineFunctionPass(ID) {
     initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
   }
@@ -495,10 +523,13 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-}
+
+} // end anonymous namespace
 
 char MachineBlockPlacement::ID = 0;
+
 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+
 INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
                       "Branch Probability Basic Block Placement", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
@@ -515,7 +546,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
 static std::string getBlockName(const MachineBasicBlock *BB) {
   std::string Result;
   raw_string_ostream OS(Result);
-  OS << "BB#" << BB->getNumber();
+  OS << printMBBReference(*BB);
   OS << " ('" << BB->getName() << "')";
   OS.flush();
   return Result;
@@ -1094,6 +1125,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
 void MachineBlockPlacement::precomputeTriangleChains() {
   struct TriangleChain {
     std::vector<MachineBasicBlock *> Edges;
+
     TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
         : Edges({src, dst}) {}
 
@@ -1203,7 +1235,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {
 // When profile is available, we need to handle the triangle-shape CFG.
 static BranchProbability getLayoutSuccessorProbThreshold(
       const MachineBasicBlock *BB) {
-  if (!BB->getParent()->getFunction()->getEntryCount())
+  if (!BB->getParent()->getFunction().getEntryCount())
     return BranchProbability(StaticLikelyProb, 100);
   if (BB->succ_size() == 2) {
     const MachineBasicBlock *Succ1 = *BB->succ_begin();
@@ -1534,10 +1566,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
   // worklist of already placed entries.
   // FIXME: If this shows up on profiles, it could be folded (at the cost of
   // some code complexity) into the loop below.
-  WorkList.erase(remove_if(WorkList,
-                           [&](MachineBasicBlock *BB) {
-                             return BlockToChain.lookup(BB) == &Chain;
-                           }),
+  WorkList.erase(llvm::remove_if(WorkList,
+                                 [&](MachineBasicBlock *BB) {
+                                   return BlockToChain.lookup(BB) == &Chain;
+                                 }),
                  WorkList.end());
 
   if (WorkList.empty())
@@ -1659,7 +1691,7 @@ void MachineBlockPlacement::buildChain(
   const MachineBasicBlock *LoopHeaderBB = HeadBB;
   markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
   MachineBasicBlock *BB = *std::prev(Chain.end());
-  for (;;) {
+  while (true) {
     assert(BB && "null block found at end of chain in loop.");
     assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
     assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
@@ -1737,7 +1769,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
   // i.e. when the layout predecessor does not fallthrough to the loop header.
   // In practice this never happens though: there always seems to be a preheader
   // that can fallthrough and that is also placed before the header.
-  if (F->getFunction()->optForSize())
+  if (F->getFunction().optForSize())
     return L.getHeader();
 
   // Check that the header hasn't been fused with a preheader block due to
@@ -1945,7 +1977,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
     }
   }
 
-  BlockChain::iterator ExitIt = find(LoopChain, ExitingBB);
+  BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
   if (ExitIt == LoopChain.end())
     return;
 
@@ -1999,7 +2031,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
     BlockChain &LoopChain, const MachineLoop &L,
     const BlockFilterSet &LoopBlockSet) {
   auto HeaderBB = L.getHeader();
-  auto HeaderIter = find(LoopChain, HeaderBB);
+  auto HeaderIter = llvm::find(LoopChain, HeaderBB);
   auto RotationPos = LoopChain.end();
 
   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2146,7 +2178,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
   // will be merged into the first outer loop chain for which this block is not
   // cold anymore. This needs precise profile data and we only do this when
   // profile data is available.
-  if (F->getFunction()->getEntryCount()) {
+  if (F->getFunction().getEntryCount() || ForceLoopColdBlock) {
     BlockFrequency LoopFreq(0);
     for (auto LoopPred : L.getHeader()->predecessors())
       if (!L.contains(LoopPred))
@@ -2188,7 +2220,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
   // for better layout.
   bool RotateLoopWithProfile =
       ForcePreciseRotationCost ||
-      (PreciseRotationCost && F->getFunction()->getEntryCount());
+      (PreciseRotationCost && F->getFunction().getEntryCount());
 
   // First check to see if there is an obviously preferable top block for the
   // loop. This will default to the header, but may end up as one of the
@@ -2201,6 +2233,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
   // If we selected just the header for the loop top, look for a potentially
   // profitable exit block in the event that rotating the loop can eliminate
   // branches by placing an exit edge at the bottom.
+  //
+  // Loops are processed innermost to uttermost, make sure we clear
+  // PreferredLoopExit before processing a new loop.
+  PreferredLoopExit = nullptr;
   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
     PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
 
@@ -2272,7 +2308,7 @@ void MachineBlockPlacement::buildCFGChains() {
         new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
     // Also, merge any blocks which we cannot reason about and must preserve
     // the exact fallthrough behavior for.
-    for (;;) {
+    while (true) {
       Cond.clear();
       MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
       if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
@@ -2313,7 +2349,7 @@ void MachineBlockPlacement::buildCFGChains() {
   buildChain(&F->front(), FunctionChain);
 
 #ifndef NDEBUG
-  typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+  using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;
 #endif
   DEBUG({
     // Crash at the end so we get all of the debugging output first.
@@ -2449,7 +2485,7 @@ void MachineBlockPlacement::alignBlocks() {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F->getFunction()->optForSize())
+  if (F->getFunction().optForSize())
     return;
   BlockChain &FunctionChain = *BlockToChain[&F->front()];
   if (FunctionChain.begin() == FunctionChain.end())
@@ -2545,8 +2581,8 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
   // duplicated from here on are already scheduled.
   // Note that DuplicatedToLPred always implies Removed.
   while (DuplicatedToLPred) {
-    assert (Removed && "Block must have been removed to be duplicated into its "
-            "layout predecessor.");
+    assert(Removed && "Block must have been removed to be duplicated into its "
+           "layout predecessor.");
     MachineBasicBlock *DupBB, *DupPred;
     // The removal callback causes Chain.end() to be updated when a block is
     // removed. On the first pass through the loop, the chain end should be the
@@ -2629,8 +2665,10 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
           if (RemBB->isEHPad())
             RemoveList = EHPadWorkList;
           RemoveList.erase(
-              remove_if(RemoveList,
-                        [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
+              llvm::remove_if(RemoveList,
+                              [RemBB](MachineBasicBlock *BB) {
+                                return BB == RemBB;
+                              }),
               RemoveList.end());
         }
 
@@ -2648,7 +2686,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
               << getBlockName(RemBB) << "\n");
       };
   auto RemovalCallbackRef =
-      llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+      function_ref<void(MachineBasicBlock*)>(RemovalCallback);
 
   SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
   bool IsSimple = TailDup.isSimpleBB(BB);
@@ -2677,7 +2715,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
 }
 
 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   // Check for single-block functions and skip them.
@@ -2722,9 +2760,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
 
   if (TailDupPlacement) {
     MPDT = &getAnalysis<MachinePostDominatorTree>();
-    if (MF.getFunction()->optForSize())
+    if (MF.getFunction().optForSize())
       TailDupSize = 1;
-    TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+    bool PreRegAlloc = false;
+    TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
     precomputeTriangleChains();
   }
 
@@ -2778,7 +2817,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   }
   if (ViewBlockLayoutWithBFI != GVDT_None &&
       (ViewBlockFreqFuncName.empty() ||
-       F->getFunction()->getName().equals(ViewBlockFreqFuncName))) {
+       F->getFunction().getName().equals(ViewBlockFreqFuncName))) {
     MBFI->view("MBP." + MF.getName(), false);
   }
 
@@ -2789,6 +2828,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
 }
 
 namespace {
+
 /// \brief A pass to compute block placement statistics.
 ///
 /// A separate pass to compute interesting statistics for evaluating block
@@ -2804,6 +2844,7 @@ class MachineBlockPlacementStats : public MachineFunctionPass {
 
 public:
   static char ID; // Pass identification, replacement for typeid
+
   MachineBlockPlacementStats() : MachineFunctionPass(ID) {
     initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
   }
@@ -2817,10 +2858,13 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-}
+
+} // end anonymous namespace
 
 char MachineBlockPlacementStats::ID = 0;
+
 char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+
 INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
                       "Basic Block Placement Stats", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 21eff9dfff9c..e4952aaaba06 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -84,7 +84,7 @@ raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
     const MachineBasicBlock *Dst) const {
 
   const BranchProbability Prob = getEdgeProbability(Src, Dst);
-  OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+  OS << "edge " << printMBBReference(*Src) << " -> " << printMBBReference(*Dst)
      << " probability is " << Prob
      << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
 
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 582ff139f886..53c0d840ac84 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -1,4 +1,4 @@
-//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,18 +15,35 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "machine-cse"
@@ -40,15 +57,18 @@ STATISTIC(NumCrossBBCSEs,
 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
 
 namespace {
+
   class MachineCSE : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     AliasAnalysis *AA;
     MachineDominatorTree *DT;
     MachineRegisterInfo *MRI;
+
   public:
     static char ID; // Pass identification
-    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) {
+
+    MachineCSE() : MachineFunctionPass(ID) {
       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
     }
 
@@ -69,16 +89,18 @@ namespace {
     }
 
   private:
-    unsigned LookAheadLimit;
-    typedef RecyclingAllocator<BumpPtrAllocator,
-        ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
-    typedef ScopedHashTable<MachineInstr*, unsigned,
-        MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
-    typedef ScopedHTType::ScopeTy ScopeType;
-    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+    using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
+                            ScopedHashTableVal<MachineInstr *, unsigned>>;
+    using ScopedHTType =
+        ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
+                        AllocatorTy>;
+    using ScopeType = ScopedHTType::ScopeTy;
+
+    unsigned LookAheadLimit = 0;
+    DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
     ScopedHTType VNT;
-    SmallVector<MachineInstr*, 64> Exps;
-    unsigned CurrVN;
+    SmallVector<MachineInstr *, 64> Exps;
+    unsigned CurrVN = 0;
 
     bool PerformTrivialCopyPropagation(MachineInstr *MI,
                                        MachineBasicBlock *MBB);
@@ -104,10 +126,13 @@ namespace {
                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
     bool PerformCSE(MachineDomTreeNode *Node);
   };
+
 } // end anonymous namespace
 
 char MachineCSE::ID = 0;
+
 char &llvm::MachineCSEID = MachineCSE::ID;
+
 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
                       "Machine Common Subexpression Elimination", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -225,8 +250,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
       continue;
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-    // Reading constant physregs is ok.
-    if (!MRI->isConstantPhysReg(Reg))
+    // Reading either caller preserved or constant physregs is ok.
+    if (!MRI->isCallerPreservedOrConstPhysReg(Reg))
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         PhysRefs.insert(*AI);
   }
@@ -598,12 +623,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
       // Go through implicit defs of CSMI and MI, and clear the kill flags on
       // their uses in all the instructions between CSMI and MI.
       // We might have made some of the kill flags redundant, consider:
-      //   subs  ... %NZCV<imp-def>        <- CSMI
-      //   csinc ... %NZCV<imp-use,kill>   <- this kill flag isn't valid anymore
-      //   subs  ... %NZCV<imp-def>        <- MI, to be eliminated
-      //   csinc ... %NZCV<imp-use,kill>
+      //   subs  ... implicit-def %nzcv    <- CSMI
+      //   csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
+      //   subs  ... implicit-def %nzcv    <- MI, to be eliminated
+      //   csinc ... implicit killed %nzcv
       // Since we eliminated MI, and reused a register imp-def'd by CSMI
-      // (here %NZCV), that register, if it was killed before MI, should have
+      // (here %nzcv), that register, if it was killed before MI, should have
       // that kill flag removed, because it's lifetime was extended.
       if (CSMI->getParent() == MI->getParent()) {
         for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
@@ -702,7 +727,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
 }
 
 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index e6f80dbb8630..702d21228477 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -16,17 +16,17 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -34,6 +34,11 @@ using namespace llvm;
 
 STATISTIC(NumInstCombined, "Number of machineinst combined");
 
+static cl::opt<unsigned>
+inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
+              cl::desc("Incremental depth computation will be used for basic "
+                       "blocks with more instructions."), cl::init(500));
+
 namespace {
 class MachineCombiner : public MachineFunctionPass {
   const TargetInstrInfo *TII;
@@ -73,7 +78,7 @@ private:
                           SmallVectorImpl<MachineInstr *> &InsInstrs,
                           SmallVectorImpl<MachineInstr *> &DelInstrs,
                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-                          MachineCombinerPattern Pattern);
+                          MachineCombinerPattern Pattern, bool SlackIsAccurate);
   bool preservesResourceLen(MachineBasicBlock *MBB,
                             MachineTraceMetrics::Trace BlockTrace,
                             SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -155,9 +160,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
         assert(DefInstr &&
                "There must be a definition for a new virtual register");
         DepthOp = InstrDepth[II->second];
-        LatencyOp = TSchedModel.computeOperandLatency(
-            DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
-            InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+        int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
+        int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
+        LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
+                                                      InstrPtr, UseIdx);
       } else {
         MachineInstr *DefInstr = getOperandDef(MO);
         if (DefInstr) {
@@ -247,7 +253,8 @@ bool MachineCombiner::improvesCriticalPathLen(
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     SmallVectorImpl<MachineInstr *> &DelInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-    MachineCombinerPattern Pattern) {
+    MachineCombinerPattern Pattern,
+    bool SlackIsAccurate) {
   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
          "Missing machine model\n");
   // NewRoot is the last instruction in the \p InsInstrs vector.
@@ -258,7 +265,7 @@ bool MachineCombiner::improvesCriticalPathLen(
   unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
   unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
 
-  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n";
         dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
         dbgs() << " RootDepth: " << RootDepth << "\n");
 
@@ -274,24 +281,32 @@ bool MachineCombiner::improvesCriticalPathLen(
   // of the original code sequence. This may allow the transform to proceed
   // even if the instruction depths (data dependency cycles) become worse.
 
-  unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
-  unsigned RootLatency = 0;
+  // Account for the latency of the inserted and deleted instructions by
+  // adding up their latencies. This assumes that the inserted and deleted
+  // instructions are dependent instruction chains, which might not hold
+  // in all cases.
+  unsigned NewRootLatency = 0;
+  for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
+    NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
+  NewRootLatency += getLatency(Root, NewRoot, BlockTrace);
 
+  unsigned RootLatency = 0;
   for (auto I : DelInstrs)
     RootLatency += TSchedModel.computeInstrLatency(I);
 
   unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
-
+  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+  unsigned OldCycleCount = RootDepth + RootLatency +
+                           (SlackIsAccurate ? RootSlack : 0);
   DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
         dbgs() << " RootLatency: " << RootLatency << "\n";
-        dbgs() << " RootSlack: " << RootSlack << "\n";
+        dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate="
+               << SlackIsAccurate << "\n";
         dbgs() << " NewRootDepth + NewRootLatency = "
-               << NewRootDepth + NewRootLatency << "\n";
+               << NewCycleCount << "\n";
         dbgs() << " RootDepth + RootLatency + RootSlack = "
-               << RootDepth + RootLatency + RootSlack << "\n";);
-
-  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
-  unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+               << OldCycleCount << "\n";
+        );
 
   return NewCycleCount <= OldCycleCount;
 }
@@ -354,17 +369,44 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
   return false;
 }
 
+/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
+/// depths if requested.
+///
+/// \param MBB basic block to insert instructions in
+/// \param MI current machine instruction
+/// \param InsInstrs new instructions to insert in \p MBB
+/// \param DelInstrs instruction to delete from \p MBB
+/// \param MinInstr is a pointer to the machine trace information
+/// \param RegUnits set of live registers, needed to compute instruction depths
+/// \param IncrementalUpdate if true, compute instruction depths incrementally,
+///                          otherwise invalidate the trace
 static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
                                      SmallVector<MachineInstr *, 16> InsInstrs,
                                      SmallVector<MachineInstr *, 16> DelInstrs,
-                                     MachineTraceMetrics *Traces) {
+                                     MachineTraceMetrics::Ensemble *MinInstr,
+                                     SparseSet<LiveRegUnit> &RegUnits,
+                                     bool IncrementalUpdate) {
   for (auto *InstrPtr : InsInstrs)
     MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
-  for (auto *InstrPtr : DelInstrs)
+
+  for (auto *InstrPtr : DelInstrs) {
     InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-  ++NumInstCombined;
-  Traces->invalidate(MBB);
-  Traces->verifyAnalysis();
+    // Erase all LiveRegs defined by the removed instruction
+    for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
+      if (I->MI == InstrPtr)
+        I = RegUnits.erase(I);
+      else
+        I++;
+    }
+  }
+
+  if (IncrementalUpdate)
+    for (auto *InstrPtr : InsInstrs)
+      MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
+  else
+    MinInstr->invalidate(MBB);
+
+  NumInstCombined++;
 }
 
 /// Substitute a slow code sequence with a faster one by
@@ -378,9 +420,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
   bool Changed = false;
   DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
 
+  bool IncrementalUpdate = false;
   auto BlockIter = MBB->begin();
+  decltype(BlockIter) LastUpdate;
   // Check if the block is in a loop.
   const MachineLoop *ML = MLI->getLoopFor(MBB);
+  if (!MinInstr)
+    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+  SparseSet<LiveRegUnit> RegUnits;
+  RegUnits.setUniverse(TRI->getNumRegUnits());
 
   while (BlockIter != MBB->end()) {
     auto &MI = *BlockIter++;
@@ -419,9 +468,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
       SmallVector<MachineInstr *, 16> InsInstrs;
       SmallVector<MachineInstr *, 16> DelInstrs;
       DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
-      if (!MinInstr)
-        MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
-      Traces->verifyAnalysis();
       TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
                                       InstrIdxForVirtReg);
       unsigned NewInstCount = InsInstrs.size();
@@ -436,23 +482,43 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
       if (ML && TII->isThroughputPattern(P))
         SubstituteAlways = true;
 
+      if (IncrementalUpdate) {
+        // Update depths since the last incremental update.
+        MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
+        LastUpdate = BlockIter;
+      }
+
       // Substitute when we optimize for codesize and the new sequence has
       // fewer instructions OR
       // the new sequence neither lengthens the critical path nor increases
       // resource pressure.
       if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
-        insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+        insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+                                 RegUnits, IncrementalUpdate);
         // Eagerly stop after the first pattern fires.
         Changed = true;
         break;
       } else {
-        // Calculating the trace metrics may be expensive,
-        // so only do this when necessary.
+        // For big basic blocks, we only compute the full trace the first time
+        // we hit this. We do not invalidate the trace, but instead update the
+        // instruction depths incrementally.
+        // NOTE: Only the instruction depths up to MI are accurate. All other
+        // trace information is not updated.
         MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+        Traces->verifyAnalysis();
         if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
-                                    InstrIdxForVirtReg, P) &&
+                                    InstrIdxForVirtReg, P,
+                                    !IncrementalUpdate) &&
             preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
-          insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+          if (MBB->size() > inc_threshold) {
+            // Use incremental depth updates for basic blocks above treshold
+            IncrementalUpdate = true;
+            LastUpdate = BlockIter;
+          }
+
+          insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+                                   RegUnits, IncrementalUpdate);
+
           // Eagerly stop after the first pattern fires.
           Changed = true;
           break;
@@ -467,6 +533,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
     }
   }
 
+  if (Changed && IncrementalUpdate)
+    Traces->invalidate(MBB);
   return Changed;
 }
 
@@ -480,7 +548,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   MLI = &getAnalysis<MachineLoopInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
-  OptSize = MF.getFunction()->optForSize();
+  OptSize = MF.getFunction().optForSize();
 
   DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
   if (!TII->useMachineCombiner()) {
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 7d5a68192e6b..fcec05adc732 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -12,19 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "machine-cp"
@@ -32,9 +39,10 @@ using namespace llvm;
 STATISTIC(NumDeletes, "Number of dead copies deleted");
 
 namespace {
-  typedef SmallVector<unsigned, 4> RegList;
-  typedef DenseMap<unsigned, RegList> SourceMap;
-  typedef DenseMap<unsigned, MachineInstr*> Reg2MIMap;
+
+using RegList = SmallVector<unsigned, 4>;
+using SourceMap = DenseMap<unsigned, RegList>;
+using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;
 
   class MachineCopyPropagation : public MachineFunctionPass {
     const TargetRegisterInfo *TRI;
@@ -43,6 +51,7 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
+
     MachineCopyPropagation() : MachineFunctionPass(ID) {
       initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
     }
@@ -67,16 +76,23 @@ namespace {
 
     /// Candidates for deletion.
     SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
+
     /// Def -> available copies map.
     Reg2MIMap AvailCopyMap;
+
     /// Def -> copies map.
     Reg2MIMap CopyMap;
+
     /// Src -> Def map
     SourceMap SrcMap;
+
     bool Changed;
   };
-}
+
+} // end anonymous namespace
+
 char MachineCopyPropagation::ID = 0;
+
 char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
 
 INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
@@ -170,6 +186,8 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
 
   // Check that the existing copy uses the correct sub registers.
   MachineInstr &PrevCopy = *CI->second;
+  if (PrevCopy.getOperand(0).isDead())
+    return false;
   if (!isNopCopy(PrevCopy, Src, Def, TRI))
     return false;
 
@@ -207,19 +225,19 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
 
       // The two copies cancel out and the source of the first copy
       // hasn't been overridden, eliminate the second one. e.g.
-      //  %ECX<def> = COPY %EAX
-      //  ... nothing clobbered EAX.
-      //  %EAX<def> = COPY %ECX
+      //  %ecx = COPY %eax
+      //  ... nothing clobbered eax.
+      //  %eax = COPY %ecx
       // =>
-      //  %ECX<def> = COPY %EAX
+      //  %ecx = COPY %eax
       //
       // or
       //
-      //  %ECX<def> = COPY %EAX
-      //  ... nothing clobbered EAX.
-      //  %ECX<def> = COPY %EAX
+      //  %ecx = COPY %eax
+      //  ... nothing clobbered eax.
+      //  %ecx = COPY %eax
       // =>
-      //  %ECX<def> = COPY %EAX
+      //  %ecx = COPY %eax
       if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
         continue;
 
@@ -243,11 +261,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
 
       // If 'Def' is previously source of another copy, then this earlier copy's
       // source is no longer available. e.g.
-      // %xmm9<def> = copy %xmm2
+      // %xmm9 = copy %xmm2
       // ...
-      // %xmm2<def> = copy %xmm0
+      // %xmm2 = copy %xmm0
       // ...
-      // %xmm2<def> = copy %xmm9
+      // %xmm2 = copy %xmm9
       ClobberRegister(Def);
       for (const MachineOperand &MO : MI->implicit_operands()) {
         if (!MO.isReg() || !MO.isDef())
@@ -269,7 +287,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       // it's no longer available for copy propagation.
       RegList &DestList = SrcMap[Src];
       if (!is_contained(DestList, Def))
-        DestList.push_back(Def);
+          DestList.push_back(Def);
 
       continue;
     }
@@ -360,7 +378,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
 }
 
 bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   Changed = false;
@@ -374,4 +392,3 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
 
   return Changed;
 }
-
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 845e8232477c..517ac29b6450 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -26,7 +26,7 @@ static bool VerifyMachineDomInfo = true;
 static bool VerifyMachineDomInfo = false;
 #endif
 static cl::opt<bool, true> VerifyMachineDomInfoX(
-    "verify-machine-dom-info", cl::location(VerifyMachineDomInfo),
+    "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden,
     cl::desc("Verify machine dominator info (time consuming)"));
 
 namespace llvm {
@@ -148,7 +148,8 @@ void MachineDominatorTree::verifyDomTree() const {
   OtherDT.recalculate(F);
   if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
       DT->compare(OtherDT)) {
-    errs() << "MachineDominatorTree is not up to date!\nComputed:\n";
+    errs() << "MachineDominatorTree for function " << F.getName()
+           << " is not up to date!\nComputed:\n";
     DT->print(errs());
     errs() << "\nActual:\n";
     OtherDT.print(errs());
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
index 73d778ff3023..2aa9d6b816c8 100644
--- a/lib/CodeGen/MachineFrameInfo.cpp
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -16,12 +16,12 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 
 #define DEBUG_TYPE "codegen"
@@ -47,11 +47,13 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
 }
 
 int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
-                      bool isSS, const AllocaInst *Alloca) {
+                                        bool IsSpillSlot,
+                                        const AllocaInst *Alloca,
+                                        uint8_t StackID) {
   assert(Size != 0 && "Cannot allocate zero size stack objects!");
   Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
-  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
-                                !isSS));
+  Objects.push_back(StackObject(Size, Alignment, 0, false, IsSpillSlot, Alloca,
+                                !IsSpillSlot, StackID));
   int Index = (int)Objects.size() - NumFixedObjects - 1;
   assert(Index >= 0 && "Bad frame index!");
   ensureMaxAlignment(Alignment);
@@ -77,7 +79,7 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
 }
 
 int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
-                                        bool Immutable, bool isAliased) {
+                                        bool IsImmutable, bool IsAliased) {
   assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
   // The alignment of the frame index can be determined from its offset from
   // the incoming frame position.  If the frame object is at offset 32 and
@@ -85,23 +87,24 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   // object is 16-byte aligned. Note that unlike the non-fixed case, if the
   // stack needs realignment, we can't assume that the stack will in fact be
   // aligned.
-  unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
-  Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
-  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
-                                              /*isSS*/   false,
-                                              /*Alloca*/ nullptr, isAliased));
+  unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+  Objects.insert(Objects.begin(),
+                 StackObject(Size, Alignment, SPOffset, IsImmutable,
+                             /*isSpillSlot=*/false, /*Alloca=*/nullptr,
+                             IsAliased));
   return -++NumFixedObjects;
 }
 
 int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
                                                   int64_t SPOffset,
-                                                  bool Immutable) {
-  unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
-  Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
-  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
-                                              /*isSS*/ true,
-                                              /*Alloca*/ nullptr,
-                                              /*isAliased*/ false));
+                                                  bool IsImmutable) {
+  unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+  Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+  Objects.insert(Objects.begin(),
+                 StackObject(Size, Alignment, SPOffset, IsImmutable,
+                             /*IsSpillSlot=*/true, /*Alloca=*/nullptr,
+                             /*IsAliased=*/false));
   return -++NumFixedObjects;
 }
 
@@ -212,6 +215,10 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
     const StackObject &SO = Objects[i];
     OS << "  fi#" << (int)(i-NumFixedObjects) << ": ";
+
+    if (SO.StackID != 0)
+      OS << "id=" << SO.StackID << ' ';
+
     if (SO.Size == ~0ULL) {
       OS << "dead\n";
       continue;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 742b095d955e..bc8eb1429d92 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -1,4 +1,4 @@
-//===-- MachineFunction.cpp -----------------------------------------------===//
+//===- MachineFunction.cpp ------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,45 +14,76 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Value.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "codegen"
 
 static cl::opt<unsigned>
-    AlignAllFunctions("align-all-functions",
-                      cl::desc("Force the alignment of all functions."),
-                      cl::init(0), cl::Hidden);
+AlignAllFunctions("align-all-functions",
+                  cl::desc("Force the alignment of all functions."),
+                  cl::init(0), cl::Hidden);
 
 static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
-  typedef MachineFunctionProperties::Property P;
+  using P = MachineFunctionProperties::Property;
+
   switch(Prop) {
   case P::FailedISel: return "FailedISel";
   case P::IsSSA: return "IsSSA";
@@ -81,23 +112,23 @@ void MachineFunctionProperties::print(raw_ostream &OS) const {
 //===----------------------------------------------------------------------===//
 
 // Out-of-line virtual method.
-MachineFunctionInfo::~MachineFunctionInfo() {}
+MachineFunctionInfo::~MachineFunctionInfo() = default;
 
 void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
 static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
-                                           const Function *Fn) {
-  if (Fn->hasFnAttribute(Attribute::StackAlignment))
-    return Fn->getFnStackAlignment();
+                                           const Function &F) {
+  if (F.hasFnAttribute(Attribute::StackAlignment))
+    return F.getFnStackAlignment();
   return STI->getFrameLowering()->getStackAlignment();
 }
 
-MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target,
+                                 const TargetSubtargetInfo &STI,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
-    : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
-      MMI(mmi) {
+    : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) {
   FunctionNumber = FunctionNum;
   init();
 }
@@ -115,21 +146,21 @@ void MachineFunction::init() {
   // We can realign the stack if the target supports it and the user hasn't
   // explicitly asked us not to.
   bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
-                      !Fn->hasFnAttribute("no-realign-stack");
+                      !F.hasFnAttribute("no-realign-stack");
   FrameInfo = new (Allocator) MachineFrameInfo(
-      getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP,
+      getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
       /*ForceRealign=*/CanRealignSP &&
-          Fn->hasFnAttribute(Attribute::StackAlignment));
+          F.hasFnAttribute(Attribute::StackAlignment));
 
-  if (Fn->hasFnAttribute(Attribute::StackAlignment))
-    FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
+  if (F.hasFnAttribute(Attribute::StackAlignment))
+    FrameInfo->ensureMaxAlignment(F.getFnStackAlignment());
 
   ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
   Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
 
-  // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+  // FIXME: Shouldn't use pref alignment if explicit alignment is set on F.
   // FIXME: Use Function::optForSize().
-  if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
+  if (!F.hasFnAttribute(Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          STI->getTargetLowering()->getPrefFunctionAlignment());
 
@@ -139,7 +170,7 @@ void MachineFunction::init() {
   JumpTableInfo = nullptr;
 
   if (isFuncletEHPersonality(classifyEHPersonality(
-          Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) {
+          F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) {
     WinEHInfo = new (Allocator) WinEHFuncInfo();
   }
 
@@ -147,7 +178,9 @@ void MachineFunction::init() {
          "Can't create a MachineFunction using a Module with a "
          "Target-incompatible DataLayout attached\n");
 
-  PSVManager = llvm::make_unique<PseudoSourceValueManager>();
+  PSVManager =
+    llvm::make_unique<PseudoSourceValueManager>(*(getSubtarget().
+                                                  getInstrInfo()));
 }
 
 MachineFunction::~MachineFunction() {
@@ -166,6 +199,7 @@ void MachineFunction::clear() {
   InstructionRecycler.clear(Allocator);
   OperandRecycler.clear(Allocator);
   BasicBlockRecycler.clear(Allocator);
+  CodeViewAnnotations.clear();
   VariableDbgInfos.clear();
   if (RegInfo) {
     RegInfo->~MachineRegisterInfo();
@@ -194,7 +228,7 @@ void MachineFunction::clear() {
 }
 
 const DataLayout &MachineFunction::getDataLayout() const {
-  return Fn->getParent()->getDataLayout();
+  return F.getParent()->getDataLayout();
 }
 
 /// Get the JumpTableInfo for this function.
@@ -210,7 +244,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
 
 /// Should we be emitting segmented stack stuff for the function
 bool MachineFunction::shouldSplitStack() const {
-  return getFunction()->hasFnAttribute("split-stack");
+  return getFunction().hasFnAttribute("split-stack");
 }
 
 /// This discards all of the MachineBasicBlock numbers and recomputes them.
@@ -270,6 +304,26 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
              MachineInstr(*this, *Orig);
 }
 
+MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) {
+  MachineInstr *FirstClone = nullptr;
+  MachineBasicBlock::const_instr_iterator I = Orig.getIterator();
+  while (true) {
+    MachineInstr *Cloned = CloneMachineInstr(&*I);
+    MBB.insert(InsertBefore, Cloned);
+    if (FirstClone == nullptr) {
+      FirstClone = Cloned;
+    } else {
+      Cloned->bundleWithPred();
+    }
+
+    if (!I->isBundledWithSucc())
+      break;
+    ++I;
+  }
+  return *FirstClone;
+}
+
 /// Delete the given MachineInstr.
 ///
 /// This function also serves as the MachineInstr destructor - the real
@@ -431,8 +485,7 @@ LLVM_DUMP_METHOD void MachineFunction::dump() const {
 #endif
 
 StringRef MachineFunction::getName() const {
-  assert(getFunction() && "No function!");
-  return getFunction()->getName();
+  return getFunction().getName();
 }
 
 void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
@@ -456,17 +509,17 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
     OS << "Function Live Ins: ";
     for (MachineRegisterInfo::livein_iterator
          I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
-      OS << PrintReg(I->first, TRI);
+      OS << printReg(I->first, TRI);
       if (I->second)
-        OS << " in " << PrintReg(I->second, TRI);
+        OS << " in " << printReg(I->second, TRI);
       if (std::next(I) != E)
         OS << ", ";
     }
     OS << '\n';
   }
 
-  ModuleSlotTracker MST(getFunction()->getParent());
-  MST.incorporateFunction(*getFunction());
+  ModuleSlotTracker MST(getFunction().getParent());
+  MST.incorporateFunction(getFunction());
   for (const auto &BB : *this) {
     OS << '\n';
     BB.print(OS, MST, Indexes);
@@ -476,10 +529,10 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
 }
 
 namespace llvm {
+
   template<>
   struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
-
-  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+    DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
       return ("CFG for '" + F->getName() + "' function").str();
@@ -492,7 +545,7 @@ namespace llvm {
         raw_string_ostream OSS(OutStr);
 
         if (isSimple()) {
-          OSS << "BB#" << Node->getNumber();
+          OSS << printMBBReference(*Node);
           if (const BasicBlock *BB = Node->getBasicBlock())
             OSS << ": " << BB->getName();
         } else
@@ -510,7 +563,8 @@ namespace llvm {
       return OutStr;
     }
   };
-}
+
+} // end namespace llvm
 
 void MachineFunction::viewCFG() const
 {
@@ -797,7 +851,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
   // alignment.
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
-    return TD.getPointerABIAlignment();
+    return TD.getPointerABIAlignment(0);
   case MachineJumpTableInfo::EK_GPRel64BlockAddress:
     return TD.getABIIntegerTypeAlignment(64);
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
@@ -851,9 +905,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
   OS << "Jump Tables:\n";
 
   for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
-    OS << "  jt#" << i << ": ";
+    OS << printJumpTableEntryReference(i) << ": ";
     for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
-      OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+      OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
   }
 
   OS << '\n';
@@ -863,12 +917,15 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
 LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); }
 #endif
 
+Printable llvm::printJumpTableEntryReference(unsigned Idx) {
+  return Printable([Idx](raw_ostream &OS) { OS << "%jump-table." << Idx; });
+}
 
 //===----------------------------------------------------------------------===//
 //  MachineConstantPool implementation
 //===----------------------------------------------------------------------===//
 
-void MachineConstantPoolValue::anchor() { }
+void MachineConstantPoolValue::anchor() {}
 
 Type *MachineConstantPoolEntry::getType() const {
   if (isMachineConstantPoolEntry())
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 535757ed87c1..14655c6eb700 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -33,6 +34,9 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
@@ -58,11 +62,8 @@
 #include "llvm/Support/LowLevelTypeImpl.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -73,695 +74,6 @@
 
 using namespace llvm;
 
-static cl::opt<bool> PrintWholeRegMask(
-    "print-whole-regmask",
-    cl::desc("Print the full contents of regmask operands in IR dumps"),
-    cl::init(true), cl::Hidden);
-
-//===----------------------------------------------------------------------===//
-// MachineOperand Implementation
-//===----------------------------------------------------------------------===//
-
-void MachineOperand::setReg(unsigned Reg) {
-  if (getReg() == Reg) return; // No change.
-
-  // Otherwise, we have to change the register.  If this operand is embedded
-  // into a machine function, we need to update the old and new register's
-  // use/def lists.
-  if (MachineInstr *MI = getParent())
-    if (MachineBasicBlock *MBB = MI->getParent())
-      if (MachineFunction *MF = MBB->getParent()) {
-        MachineRegisterInfo &MRI = MF->getRegInfo();
-        MRI.removeRegOperandFromUseList(this);
-        SmallContents.RegNo = Reg;
-        MRI.addRegOperandToUseList(this);
-        return;
-      }
-
-  // Otherwise, just change the register, no problem.  :)
-  SmallContents.RegNo = Reg;
-}
-
-void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
-                                  const TargetRegisterInfo &TRI) {
-  assert(TargetRegisterInfo::isVirtualRegister(Reg));
-  if (SubIdx && getSubReg())
-    SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
-  setReg(Reg);
-  if (SubIdx)
-    setSubReg(SubIdx);
-}
-
-void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
-  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-  if (getSubReg()) {
-    Reg = TRI.getSubReg(Reg, getSubReg());
-    // Note that getSubReg() may return 0 if the sub-register doesn't exist.
-    // That won't happen in legal code.
-    setSubReg(0);
-    if (isDef())
-      setIsUndef(false);
-  }
-  setReg(Reg);
-}
-
-/// Change a def to a use, or a use to a def.
-void MachineOperand::setIsDef(bool Val) {
-  assert(isReg() && "Wrong MachineOperand accessor");
-  assert((!Val || !isDebug()) && "Marking a debug operation as def");
-  if (IsDef == Val)
-    return;
-  // MRI may keep uses and defs in different list positions.
-  if (MachineInstr *MI = getParent())
-    if (MachineBasicBlock *MBB = MI->getParent())
-      if (MachineFunction *MF = MBB->getParent()) {
-        MachineRegisterInfo &MRI = MF->getRegInfo();
-        MRI.removeRegOperandFromUseList(this);
-        IsDef = Val;
-        MRI.addRegOperandToUseList(this);
-        return;
-      }
-  IsDef = Val;
-}
-
-// If this operand is currently a register operand, and if this is in a
-// function, deregister the operand from the register's use/def list.
-void MachineOperand::removeRegFromUses() {
-  if (!isReg() || !isOnRegUseList())
-    return;
-
-  if (MachineInstr *MI = getParent()) {
-    if (MachineBasicBlock *MBB = MI->getParent()) {
-      if (MachineFunction *MF = MBB->getParent())
-        MF->getRegInfo().removeRegOperandFromUseList(this);
-    }
-  }
-}
-
-/// ChangeToImmediate - Replace this operand with a new immediate operand of
-/// the specified value.  If an operand is known to be an immediate already,
-/// the setImm method should be used.
-void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
-  assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
-
-  removeRegFromUses();
-
-  OpKind = MO_Immediate;
-  Contents.ImmVal = ImmVal;
-}
-
-void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
-  assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
-
-  removeRegFromUses();
-
-  OpKind = MO_FPImmediate;
-  Contents.CFP = FPImm;
-}
-
-void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) {
-  assert((!isReg() || !isTied()) &&
-         "Cannot change a tied operand into an external symbol");
-
-  removeRegFromUses();
-
-  OpKind = MO_ExternalSymbol;
-  Contents.OffsetedInfo.Val.SymbolName = SymName;
-  setOffset(0); // Offset is always 0.
-  setTargetFlags(TargetFlags);
-}
-
-void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
-  assert((!isReg() || !isTied()) &&
-         "Cannot change a tied operand into an MCSymbol");
-
-  removeRegFromUses();
-
-  OpKind = MO_MCSymbol;
-  Contents.Sym = Sym;
-}
-
-void MachineOperand::ChangeToFrameIndex(int Idx) {
-  assert((!isReg() || !isTied()) &&
-         "Cannot change a tied operand into a FrameIndex");
-
-  removeRegFromUses();
-
-  OpKind = MO_FrameIndex;
-  setIndex(Idx);
-}
-
-/// ChangeToRegister - Replace this operand with a new register operand of
-/// the specified value.  If an operand is known to be an register already,
-/// the setReg method should be used.
-void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
-                                      bool isKill, bool isDead, bool isUndef,
-                                      bool isDebug) {
-  MachineRegisterInfo *RegInfo = nullptr;
-  if (MachineInstr *MI = getParent())
-    if (MachineBasicBlock *MBB = MI->getParent())
-      if (MachineFunction *MF = MBB->getParent())
-        RegInfo = &MF->getRegInfo();
-  // If this operand is already a register operand, remove it from the
-  // register's use/def lists.
-  bool WasReg = isReg();
-  if (RegInfo && WasReg)
-    RegInfo->removeRegOperandFromUseList(this);
-
-  // Change this to a register and set the reg#.
-  OpKind = MO_Register;
-  SmallContents.RegNo = Reg;
-  SubReg_TargetFlags = 0;
-  IsDef = isDef;
-  IsImp = isImp;
-  IsKill = isKill;
-  IsDead = isDead;
-  IsUndef = isUndef;
-  IsInternalRead = false;
-  IsEarlyClobber = false;
-  IsDebug = isDebug;
-  // Ensure isOnRegUseList() returns false.
-  Contents.Reg.Prev = nullptr;
-  // Preserve the tie when the operand was already a register.
-  if (!WasReg)
-    TiedTo = 0;
-
-  // If this operand is embedded in a function, add the operand to the
-  // register's use/def list.
-  if (RegInfo)
-    RegInfo->addRegOperandToUseList(this);
-}
-
-/// isIdenticalTo - Return true if this operand is identical to the specified
-/// operand. Note that this should stay in sync with the hash_value overload
-/// below.
-bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
-  if (getType() != Other.getType() ||
-      getTargetFlags() != Other.getTargetFlags())
-    return false;
-
-  switch (getType()) {
-  case MachineOperand::MO_Register:
-    return getReg() == Other.getReg() && isDef() == Other.isDef() &&
-           getSubReg() == Other.getSubReg();
-  case MachineOperand::MO_Immediate:
-    return getImm() == Other.getImm();
-  case MachineOperand::MO_CImmediate:
-    return getCImm() == Other.getCImm();
-  case MachineOperand::MO_FPImmediate:
-    return getFPImm() == Other.getFPImm();
-  case MachineOperand::MO_MachineBasicBlock:
-    return getMBB() == Other.getMBB();
-  case MachineOperand::MO_FrameIndex:
-    return getIndex() == Other.getIndex();
-  case MachineOperand::MO_ConstantPoolIndex:
-  case MachineOperand::MO_TargetIndex:
-    return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
-  case MachineOperand::MO_JumpTableIndex:
-    return getIndex() == Other.getIndex();
-  case MachineOperand::MO_GlobalAddress:
-    return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
-  case MachineOperand::MO_ExternalSymbol:
-    return strcmp(getSymbolName(), Other.getSymbolName()) == 0 &&
-           getOffset() == Other.getOffset();
-  case MachineOperand::MO_BlockAddress:
-    return getBlockAddress() == Other.getBlockAddress() &&
-           getOffset() == Other.getOffset();
-  case MachineOperand::MO_RegisterMask:
-  case MachineOperand::MO_RegisterLiveOut: {
-    // Shallow compare of the two RegMasks
-    const uint32_t *RegMask = getRegMask();
-    const uint32_t *OtherRegMask = Other.getRegMask();
-    if (RegMask == OtherRegMask)
-      return true;
-
-    // Calculate the size of the RegMask
-    const MachineFunction *MF = getParent()->getParent()->getParent();
-    const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-    unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
-
-    // Deep compare of the two RegMasks
-    return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
-  }
-  case MachineOperand::MO_MCSymbol:
-    return getMCSymbol() == Other.getMCSymbol();
-  case MachineOperand::MO_CFIIndex:
-    return getCFIIndex() == Other.getCFIIndex();
-  case MachineOperand::MO_Metadata:
-    return getMetadata() == Other.getMetadata();
-  case MachineOperand::MO_IntrinsicID:
-    return getIntrinsicID() == Other.getIntrinsicID();
-  case MachineOperand::MO_Predicate:
-    return getPredicate() == Other.getPredicate();
-  }
-  llvm_unreachable("Invalid machine operand type");
-}
-
-// Note: this must stay exactly in sync with isIdenticalTo above.
-hash_code llvm::hash_value(const MachineOperand &MO) {
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    // Register operands don't have target flags.
-    return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
-  case MachineOperand::MO_Immediate:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
-  case MachineOperand::MO_CImmediate:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
-  case MachineOperand::MO_FPImmediate:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
-  case MachineOperand::MO_MachineBasicBlock:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
-  case MachineOperand::MO_FrameIndex:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
-  case MachineOperand::MO_ConstantPoolIndex:
-  case MachineOperand::MO_TargetIndex:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
-                        MO.getOffset());
-  case MachineOperand::MO_JumpTableIndex:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
-  case MachineOperand::MO_ExternalSymbol:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
-                        MO.getSymbolName());
-  case MachineOperand::MO_GlobalAddress:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
-                        MO.getOffset());
-  case MachineOperand::MO_BlockAddress:
-    return hash_combine(MO.getType(), MO.getTargetFlags(),
-                        MO.getBlockAddress(), MO.getOffset());
-  case MachineOperand::MO_RegisterMask:
-  case MachineOperand::MO_RegisterLiveOut:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
-  case MachineOperand::MO_Metadata:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
-  case MachineOperand::MO_MCSymbol:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
-  case MachineOperand::MO_CFIIndex:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
-  case MachineOperand::MO_IntrinsicID:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
-  case MachineOperand::MO_Predicate:
-    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
-  }
-  llvm_unreachable("Invalid machine operand type");
-}
-
-void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
-                           const TargetIntrinsicInfo *IntrinsicInfo) const {
-  ModuleSlotTracker DummyMST(nullptr);
-  print(OS, DummyMST, TRI, IntrinsicInfo);
-}
-
-void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
-                           const TargetRegisterInfo *TRI,
-                           const TargetIntrinsicInfo *IntrinsicInfo) const {
-  switch (getType()) {
-  case MachineOperand::MO_Register:
-    OS << PrintReg(getReg(), TRI, getSubReg());
-
-    if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
-        isInternalRead() || isEarlyClobber() || isTied()) {
-      OS << '<';
-      bool NeedComma = false;
-      if (isDef()) {
-        if (NeedComma) OS << ',';
-        if (isEarlyClobber())
-          OS << "earlyclobber,";
-        if (isImplicit())
-          OS << "imp-";
-        OS << "def";
-        NeedComma = true;
-        // <def,read-undef> only makes sense when getSubReg() is set.
-        // Don't clutter the output otherwise.
-        if (isUndef() && getSubReg())
-          OS << ",read-undef";
-      } else if (isImplicit()) {
-        OS << "imp-use";
-        NeedComma = true;
-      }
-
-      if (isKill()) {
-        if (NeedComma) OS << ',';
-        OS << "kill";
-        NeedComma = true;
-      }
-      if (isDead()) {
-        if (NeedComma) OS << ',';
-        OS << "dead";
-        NeedComma = true;
-      }
-      if (isUndef() && isUse()) {
-        if (NeedComma) OS << ',';
-        OS << "undef";
-        NeedComma = true;
-      }
-      if (isInternalRead()) {
-        if (NeedComma) OS << ',';
-        OS << "internal";
-        NeedComma = true;
-      }
-      if (isTied()) {
-        if (NeedComma) OS << ',';
-        OS << "tied";
-        if (TiedTo != 15)
-          OS << unsigned(TiedTo - 1);
-      }
-      OS << '>';
-    }
-    break;
-  case MachineOperand::MO_Immediate:
-    OS << getImm();
-    break;
-  case MachineOperand::MO_CImmediate:
-    getCImm()->getValue().print(OS, false);
-    break;
-  case MachineOperand::MO_FPImmediate:
-    if (getFPImm()->getType()->isFloatTy()) {
-      OS << getFPImm()->getValueAPF().convertToFloat();
-    } else if (getFPImm()->getType()->isHalfTy()) {
-      APFloat APF = getFPImm()->getValueAPF();
-      bool Unused;
-      APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
-      OS << "half " << APF.convertToFloat();
-    } else if (getFPImm()->getType()->isFP128Ty()) {
-      APFloat APF = getFPImm()->getValueAPF();
-      SmallString<16> Str;
-      getFPImm()->getValueAPF().toString(Str);
-      OS << "quad " << Str;
-    } else if (getFPImm()->getType()->isX86_FP80Ty()) {
-      APFloat APF = getFPImm()->getValueAPF();
-      OS << "x86_fp80 0xK";
-      APInt API = APF.bitcastToAPInt();
-      OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
-                                 /*Upper=*/true);
-      OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
-                                 /*Upper=*/true);
-    } else {
-      OS << getFPImm()->getValueAPF().convertToDouble();
-    }
-    break;
-  case MachineOperand::MO_MachineBasicBlock:
-    OS << "<BB#" << getMBB()->getNumber() << ">";
-    break;
-  case MachineOperand::MO_FrameIndex:
-    OS << "<fi#" << getIndex() << '>';
-    break;
-  case MachineOperand::MO_ConstantPoolIndex:
-    OS << "<cp#" << getIndex();
-    if (getOffset()) OS << "+" << getOffset();
-    OS << '>';
-    break;
-  case MachineOperand::MO_TargetIndex:
-    OS << "<ti#" << getIndex();
-    if (getOffset()) OS << "+" << getOffset();
-    OS << '>';
-    break;
-  case MachineOperand::MO_JumpTableIndex:
-    OS << "<jt#" << getIndex() << '>';
-    break;
-  case MachineOperand::MO_GlobalAddress:
-    OS << "<ga:";
-    getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
-    if (getOffset()) OS << "+" << getOffset();
-    OS << '>';
-    break;
-  case MachineOperand::MO_ExternalSymbol:
-    OS << "<es:" << getSymbolName();
-    if (getOffset()) OS << "+" << getOffset();
-    OS << '>';
-    break;
-  case MachineOperand::MO_BlockAddress:
-    OS << '<';
-    getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST);
-    if (getOffset()) OS << "+" << getOffset();
-    OS << '>';
-    break;
-  case MachineOperand::MO_RegisterMask: {
-    unsigned NumRegsInMask = 0;
-    unsigned NumRegsEmitted = 0;
-    OS << "<regmask";
-    for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
-      unsigned MaskWord = i / 32;
-      unsigned MaskBit = i % 32;
-      if (getRegMask()[MaskWord] & (1 << MaskBit)) {
-        if (PrintWholeRegMask || NumRegsEmitted <= 10) {
-          OS << " " << PrintReg(i, TRI);
-          NumRegsEmitted++;
-        }
-        NumRegsInMask++;
-      }
-    }
-    if (NumRegsEmitted != NumRegsInMask)
-      OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
-    OS << ">";
-    break;
-  }
-  case MachineOperand::MO_RegisterLiveOut:
-    OS << "<regliveout>";
-    break;
-  case MachineOperand::MO_Metadata:
-    OS << '<';
-    getMetadata()->printAsOperand(OS, MST);
-    OS << '>';
-    break;
-  case MachineOperand::MO_MCSymbol:
-    OS << "<MCSym=" << *getMCSymbol() << '>';
-    break;
-  case MachineOperand::MO_CFIIndex:
-    OS << "<call frame instruction>";
-    break;
-  case MachineOperand::MO_IntrinsicID: {
-    Intrinsic::ID ID = getIntrinsicID();
-    if (ID < Intrinsic::num_intrinsics)
-      OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
-    else if (IntrinsicInfo)
-      OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
-    else
-      OS << "<intrinsic:" << ID << '>';
-    break;
-  }
-  case MachineOperand::MO_Predicate: {
-    auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
-    OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
-       << CmpInst::getPredicateName(Pred) << '>';
-    break;
-  }
-  }
-  if (unsigned TF = getTargetFlags())
-    OS << "[TF=" << TF << ']';
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void MachineOperand::dump() const {
-  dbgs() << *this << '\n';
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// MachineMemOperand Implementation
-//===----------------------------------------------------------------------===//
-
-/// getAddrSpace - Return the LLVM IR address space number that this pointer
-/// points into.
-unsigned MachinePointerInfo::getAddrSpace() const {
-  if (V.isNull() || V.is<const PseudoSourceValue*>()) return 0;
-  return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
-}
-
-/// isDereferenceable - Return true if V is always dereferenceable for 
-/// Offset + Size byte.
-bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
-                                           const DataLayout &DL) const {
-  if (!V.is<const Value*>())
-    return false;
-
-  const Value *BasePtr = V.get<const Value*>();
-  if (BasePtr == nullptr)
-    return false;
-
-  return isDereferenceableAndAlignedPointer(
-      BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
-}
-
-/// getConstantPool - Return a MachinePointerInfo record that refers to the
-/// constant pool.
-MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
-  return MachinePointerInfo(MF.getPSVManager().getConstantPool());
-}
-
-/// getFixedStack - Return a MachinePointerInfo record that refers to the
-/// the specified FrameIndex.
-MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
-                                                     int FI, int64_t Offset) {
-  return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
-}
-
-MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
-  return MachinePointerInfo(MF.getPSVManager().getJumpTable());
-}
-
-MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
-  return MachinePointerInfo(MF.getPSVManager().getGOT());
-}
-
-MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
-                                                int64_t Offset) {
-  return MachinePointerInfo(MF.getPSVManager().getStack(), Offset);
-}
-
-MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
-                                     uint64_t s, unsigned int a,
-                                     const AAMDNodes &AAInfo,
-                                     const MDNode *Ranges,
-                                     SyncScope::ID SSID,
-                                     AtomicOrdering Ordering,
-                                     AtomicOrdering FailureOrdering)
-    : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
-      AAInfo(AAInfo), Ranges(Ranges) {
-  assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
-          isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) &&
-         "invalid pointer value");
-  assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
-  assert((isLoad() || isStore()) && "Not a load/store!");
-
-  AtomicInfo.SSID = static_cast<unsigned>(SSID);
-  assert(getSyncScopeID() == SSID && "Value truncated");
-  AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
-  assert(getOrdering() == Ordering && "Value truncated");
-  AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
-  assert(getFailureOrdering() == FailureOrdering && "Value truncated");
-}
-
-/// Profile - Gather unique data for the object.
-///
-void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(getOffset());
-  ID.AddInteger(Size);
-  ID.AddPointer(getOpaqueValue());
-  ID.AddInteger(getFlags());
-  ID.AddInteger(getBaseAlignment());
-}
-
-void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
-  // The Value and Offset may differ due to CSE. But the flags and size
-  // should be the same.
-  assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
-  assert(MMO->getSize() == getSize() && "Size mismatch!");
-
-  if (MMO->getBaseAlignment() >= getBaseAlignment()) {
-    // Update the alignment value.
-    BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1;
-    // Also update the base and offset, because the new alignment may
-    // not be applicable with the old ones.
-    PtrInfo = MMO->PtrInfo;
-  }
-}
-
-/// getAlignment - Return the minimum known alignment in bytes of the
-/// actual memory reference.
-uint64_t MachineMemOperand::getAlignment() const {
-  return MinAlign(getBaseAlignment(), getOffset());
-}
-
-void MachineMemOperand::print(raw_ostream &OS) const {
-  ModuleSlotTracker DummyMST(nullptr);
-  print(OS, DummyMST);
-}
-void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
-  assert((isLoad() || isStore()) &&
-         "SV has to be a load, store or both.");
-
-  if (isVolatile())
-    OS << "Volatile ";
-
-  if (isLoad())
-    OS << "LD";
-  if (isStore())
-    OS << "ST";
-  OS << getSize();
-
-  // Print the address information.
-  OS << "[";
-  if (const Value *V = getValue())
-    V->printAsOperand(OS, /*PrintType=*/false, MST);
-  else if (const PseudoSourceValue *PSV = getPseudoValue())
-    PSV->printCustom(OS);
-  else
-    OS << "<unknown>";
-
-  unsigned AS = getAddrSpace();
-  if (AS != 0)
-    OS << "(addrspace=" << AS << ')';
-
-  // If the alignment of the memory reference itself differs from the alignment
-  // of the base pointer, print the base alignment explicitly, next to the base
-  // pointer.
-  if (getBaseAlignment() != getAlignment())
-    OS << "(align=" << getBaseAlignment() << ")";
-
-  if (getOffset() != 0)
-    OS << "+" << getOffset();
-  OS << "]";
-
-  // Print the alignment of the reference.
-  if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize())
-    OS << "(align=" << getAlignment() << ")";
-
-  // Print TBAA info.
-  if (const MDNode *TBAAInfo = getAAInfo().TBAA) {
-    OS << "(tbaa=";
-    if (TBAAInfo->getNumOperands() > 0)
-      TBAAInfo->getOperand(0)->printAsOperand(OS, MST);
-    else
-      OS << "<unknown>";
-    OS << ")";
-  }
-
-  // Print AA scope info.
-  if (const MDNode *ScopeInfo = getAAInfo().Scope) {
-    OS << "(alias.scope=";
-    if (ScopeInfo->getNumOperands() > 0)
-      for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
-        ScopeInfo->getOperand(i)->printAsOperand(OS, MST);
-        if (i != ie-1)
-          OS << ",";
-      }
-    else
-      OS << "<unknown>";
-    OS << ")";
-  }
-
-  // Print AA noalias scope info.
-  if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) {
-    OS << "(noalias=";
-    if (NoAliasInfo->getNumOperands() > 0)
-      for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
-        NoAliasInfo->getOperand(i)->printAsOperand(OS, MST);
-        if (i != ie-1)
-          OS << ",";
-      }
-    else
-      OS << "<unknown>";
-    OS << ")";
-  }
-
-  if (isNonTemporal())
-    OS << "(nontemporal)";
-  if (isDereferenceable())
-    OS << "(dereferenceable)";
-  if (isInvariant())
-    OS << "(invariant)";
-  if (getFlags() & MOTargetFlag1)
-    OS << "(flag1)";
-  if (getFlags() & MOTargetFlag2)
-    OS << "(flag2)";
-  if (getFlags() & MOTargetFlag3)
-    OS << "(flag3)";
-}
-
-//===----------------------------------------------------------------------===//
-// MachineInstr Implementation
-//===----------------------------------------------------------------------===//
-
 void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
   if (MCID->ImplicitDefs)
     for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
@@ -1034,7 +346,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
   if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
     return std::make_pair(nullptr, 0);
 
-  MachineFunction *MF = getParent()->getParent();
+  MachineFunction *MF = getMF();
   mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
   mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
                                   MemBegin);
@@ -1108,9 +420,9 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
       if (Check == IgnoreDefs)
         continue;
       else if (Check == IgnoreVRegDefs) {
-        if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
-            TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
-          if (MO.getReg() != OMO.getReg())
+        if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) ||
+            !TargetRegisterInfo::isVirtualRegister(OMO.getReg()))
+          if (!MO.isIdenticalTo(OMO))
             return false;
       } else {
         if (!MO.isIdenticalTo(OMO))
@@ -1133,6 +445,10 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
   return true;
 }
 
+const MachineFunction *MachineInstr::getMF() const {
+  return getParent()->getParent();
+}
+
 MachineInstr *MachineInstr::removeFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
   return getParent()->remove(this);
@@ -1282,8 +598,8 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
                                     const TargetInstrInfo *TII,
                                     const TargetRegisterInfo *TRI) const {
   assert(getParent() && "Can't have an MBB reference here!");
-  assert(getParent()->getParent() && "Can't have an MF reference here!");
-  const MachineFunction &MF = *getParent()->getParent();
+  assert(getMF() && "Can't have an MF reference here!");
+  const MachineFunction &MF = *getMF();
 
   // Most opcodes have fixed constraints in their MCInstrDesc.
   if (!isInlineAsm())
@@ -1427,7 +743,7 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg,
     if (MO.isUse())
       Use |= !MO.isUndef();
     else if (MO.getSubReg() && !MO.isUndef())
-      // A partial <def,undef> doesn't count as reading the register.
+      // A partial def undef doesn't count as reading the register.
       PartDef = true;
     else
       FullDef = true;
@@ -1619,7 +935,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
   // Treat volatile loads as stores. This is not strictly necessary for
   // volatiles, but it is required for atomic loads. It is not allowed to move
   // a load across an atomic load with Ordering > Monotonic.
-  if (mayStore() || isCall() ||
+  if (mayStore() || isCall() || isPHI() ||
       (mayLoad() && hasOrderedMemoryRef())) {
     SawStore = true;
     return false;
@@ -1644,8 +960,9 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
 
 bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
                             bool UseTBAA) {
-  const MachineFunction *MF = getParent()->getParent();
+  const MachineFunction *MF = getMF();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
 
   // If neither instruction stores to memory, they can't alias in any
   // meaningful way, even if they read from the same address.
@@ -1656,9 +973,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
   if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
     return false;
 
-  if (!AA)
-    return true;
-
   // FIXME: Need to handle multiple memory operands to support all targets.
   if (!hasOneMemOperand() || !Other.hasOneMemOperand())
     return true;
@@ -1666,9 +980,6 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
   MachineMemOperand *MMOa = *memoperands_begin();
   MachineMemOperand *MMOb = *Other.memoperands_begin();
 
-  if (!MMOa->getValue() || !MMOb->getValue())
-    return true;
-
   // The following interface to AA is fashioned after DAGCombiner::isAlias
   // and operates with MachineMemOperand offset with some important
   // assumptions:
@@ -1681,22 +992,53 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
   //   - There should never be any negative offsets here.
   //
   // FIXME: Modify API to hide this math from "user"
-  // FIXME: Even before we go to AA we can reason locally about some
+  // Even before we go to AA we can reason locally about some
   // memory objects. It can save compile time, and possibly catch some
   // corner cases not currently covered.
 
-  assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
-  assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+  int64_t OffsetA = MMOa->getOffset();
+  int64_t OffsetB = MMOb->getOffset();
+
+  int64_t MinOffset = std::min(OffsetA, OffsetB);
+  int64_t WidthA = MMOa->getSize();
+  int64_t WidthB = MMOb->getSize();
+  const Value *ValA = MMOa->getValue();
+  const Value *ValB = MMOb->getValue();
+  bool SameVal = (ValA && ValB && (ValA == ValB));
+  if (!SameVal) {
+    const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+    const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+    if (PSVa && ValB && !PSVa->mayAlias(&MFI))
+      return false;
+    if (PSVb && ValA && !PSVb->mayAlias(&MFI))
+      return false;
+    if (PSVa && PSVb && (PSVa == PSVb))
+      SameVal = true;
+  }
 
-  int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
-  int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
-  int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+  if (SameVal) {
+    int64_t MaxOffset = std::max(OffsetA, OffsetB);
+    int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
+    return (MinOffset + LowWidth > MaxOffset);
+  }
+
+  if (!AA)
+    return true;
+
+  if (!ValA || !ValB)
+    return true;
 
-  AliasResult AAResult =
-      AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
-                               UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
-                MemoryLocation(MMOb->getValue(), Overlapb,
-                               UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+  assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
+  assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
+
+  int64_t Overlapa = WidthA + OffsetA - MinOffset;
+  int64_t Overlapb = WidthB + OffsetB - MinOffset;
+
+  AliasResult AAResult = AA->alias(
+      MemoryLocation(ValA, Overlapa,
+                     UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+      MemoryLocation(ValB, Overlapb,
+                     UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
 
   return (AAResult != NoAlias);
 }
@@ -1822,6 +1164,41 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
   }
 }
 
+bool MachineInstr::hasComplexRegisterTies() const {
+  const MCInstrDesc &MCID = getDesc();
+  for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
+    const auto &Operand = getOperand(I);
+    if (!Operand.isReg() || Operand.isDef())
+      // Ignore the defined registers as MCID marks only the uses as tied.
+      continue;
+    int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
+    int TiedIdx = Operand.isTied() ? int(findTiedOperandIdx(I)) : -1;
+    if (ExpectedTiedIdx != TiedIdx)
+      return true;
+  }
+  return false;
+}
+
+LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
+                                 const MachineRegisterInfo &MRI) const {
+  const MachineOperand &Op = getOperand(OpIdx);
+  if (!Op.isReg())
+    return LLT{};
+
+  if (isVariadic() || OpIdx >= getNumExplicitOperands())
+    return MRI.getType(Op.getReg());
+
+  auto &OpInfo = getDesc().OpInfo[OpIdx];
+  if (!OpInfo.isGenericType())
+    return MRI.getType(Op.getReg());
+
+  if (PrintedTypes[OpInfo.getGenericTypeIndex()])
+    return LLT{};
+
+  PrintedTypes.set(OpInfo.getGenericTypeIndex());
+  return MRI.getType(Op.getReg());
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MachineInstr::dump() const {
   dbgs() << "  ";
@@ -1834,7 +1211,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc,
   const Module *M = nullptr;
   if (const MachineBasicBlock *MBB = getParent())
     if (const MachineFunction *MF = MBB->getParent())
-      M = MF->getFunction()->getParent();
+      M = MF->getFunction().getParent();
 
   ModuleSlotTracker MST(M);
   print(OS, MST, SkipOpers, SkipDebugLoc, TII);
@@ -1863,21 +1240,31 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
   // Save a list of virtual registers.
   SmallVector<unsigned, 8> VirtRegs;
 
+  SmallBitVector PrintedTypes(8);
+  bool ShouldPrintRegisterTies = hasComplexRegisterTies();
+  auto getTiedOperandIdx = [&](unsigned OpIdx) {
+    if (!ShouldPrintRegisterTies)
+      return 0U;
+    const MachineOperand &MO = getOperand(OpIdx);
+    if (MO.isReg() && MO.isTied() && !MO.isDef())
+      return findTiedOperandIdx(OpIdx);
+    return 0U;
+  };
   // Print explicitly defined operands on the left of an assignment syntax.
   unsigned StartOp = 0, e = getNumOperands();
   for (; StartOp < e && getOperand(StartOp).isReg() &&
-         getOperand(StartOp).isDef() &&
-         !getOperand(StartOp).isImplicit();
+         getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit();
        ++StartOp) {
-    if (StartOp != 0) OS << ", ";
-    getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo);
+    if (StartOp != 0)
+      OS << ", ";
+    LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
+    unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
+    getOperand(StartOp).print(OS, MST, TypeToPrint, /*PrintDef=*/false,
+                              ShouldPrintRegisterTies, TiedOperandIdx, TRI,
+                              IntrinsicInfo);
     unsigned Reg = getOperand(StartOp).getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
       VirtRegs.push_back(Reg);
-      LLT Ty = MRI ? MRI->getType(Reg) : LLT{};
-      if (Ty.isValid())
-        OS << '(' << Ty << ')';
-    }
   }
 
   if (StartOp != 0)
@@ -1900,7 +1287,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
   if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
     // Print asm string.
     OS << " ";
-    getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI);
+    const unsigned OpIdx = InlineAsm::MIOp_AsmString;
+    LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
+    unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
+    getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+                            ShouldPrintRegisterTies, TiedOperandIdx, TRI,
+                            IntrinsicInfo);
 
     // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1943,8 +1335,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
       auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
       if (DIV && !DIV->getName().empty())
         OS << "!\"" << DIV->getName() << '\"';
-      else
-        MO.print(OS, MST, TRI);
+      else {
+        LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+        unsigned TiedOperandIdx = getTiedOperandIdx(i);
+        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+                 ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+      }
     } else if (TRI && (isInsertSubreg() || isRegSequence() ||
                        (isSubregToReg() && i == 3)) && MO.isImm()) {
       OS << TRI->getSubRegIndexName(MO.getImm());
@@ -2006,8 +1402,15 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
 
       // Compute the index of the next operand descriptor.
       AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
-    } else
-      MO.print(OS, MST, TRI);
+    } else {
+      LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
+      unsigned TiedOperandIdx = getTiedOperandIdx(i);
+      if (MO.isImm() && isOperandSubregIdx(i))
+        MachineOperand::printSubregIdx(OS, MO.getImm(), TRI);
+      else
+        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true,
+                 ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
+    }
   }
 
   bool HaveSemi = false;
@@ -2057,14 +1460,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
       else
         OS << " "
            << TRI->getRegClassName(RC.get<const TargetRegisterClass *>());
-      OS << ':' << PrintReg(VirtRegs[i]);
+      OS << ':' << printReg(VirtRegs[i]);
       for (unsigned j = i+1; j != VirtRegs.size();) {
         if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) {
           ++j;
           continue;
         }
         if (VirtRegs[i] != VirtRegs[j])
-          OS << "," << PrintReg(VirtRegs[j]);
+          OS << "," << printReg(VirtRegs[j]);
         VirtRegs.erase(VirtRegs.begin()+j);
       }
     }
@@ -2328,8 +1731,8 @@ void MachineInstr::emitError(StringRef Msg) const {
 
 MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
                                   const MCInstrDesc &MCID, bool IsIndirect,
-                                  unsigned Reg, unsigned Offset,
-                                  const MDNode *Variable, const MDNode *Expr) {
+                                  unsigned Reg, const MDNode *Variable,
+                                  const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
@@ -2337,53 +1740,60 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
   if (IsIndirect)
     return BuildMI(MF, DL, MCID)
         .addReg(Reg, RegState::Debug)
-        .addImm(Offset)
+        .addImm(0U)
         .addMetadata(Variable)
         .addMetadata(Expr);
-  else {
-    assert(Offset == 0 && "A direct address cannot have an offset.");
+  else
     return BuildMI(MF, DL, MCID)
         .addReg(Reg, RegState::Debug)
         .addReg(0U, RegState::Debug)
         .addMetadata(Variable)
         .addMetadata(Expr);
-  }
 }
 
 MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
                                   MachineBasicBlock::iterator I,
                                   const DebugLoc &DL, const MCInstrDesc &MCID,
                                   bool IsIndirect, unsigned Reg,
-                                  unsigned Offset, const MDNode *Variable,
-                                  const MDNode *Expr) {
+                                  const MDNode *Variable, const MDNode *Expr) {
   assert(isa<DILocalVariable>(Variable) && "not a variable");
   assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
   MachineFunction &MF = *BB.getParent();
-  MachineInstr *MI =
-      BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr);
+  MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr);
   BB.insert(I, MI);
   return MachineInstrBuilder(MF, MI);
 }
 
+/// Compute the new DIExpression to use with a DBG_VALUE for a spill slot.
+/// This prepends DW_OP_deref when spilling an indirect DBG_VALUE.
+static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
+  assert(MI.getOperand(0).isReg() && "can't spill non-register");
+  assert(MI.getDebugVariable()->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+         "Expected inlined-at fields to agree");
+
+  const DIExpression *Expr = MI.getDebugExpression();
+  if (MI.isIndirectDebugValue()) {
+    assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+    Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+  }
+  return Expr;
+}
+
 MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
                                           MachineBasicBlock::iterator I,
                                           const MachineInstr &Orig,
                                           int FrameIndex) {
-  const MDNode *Var = Orig.getDebugVariable();
-  const auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression());
-  bool IsIndirect = Orig.isIndirectDebugValue();
-  uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0;
-  DebugLoc DL = Orig.getDebugLoc();
-  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
-         "Expected inlined-at fields to agree");
-  // If the DBG_VALUE already was a memory location, add an extra
-  // DW_OP_deref. Otherwise just turning this from a register into a
-  // memory/indirect location is sufficient.
-  if (IsIndirect)
-    Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
-  return BuildMI(BB, I, DL, Orig.getDesc())
+  const DIExpression *Expr = computeExprForSpill(Orig);
+  return BuildMI(BB, I, Orig.getDebugLoc(), Orig.getDesc())
       .addFrameIndex(FrameIndex)
-      .addImm(Offset)
-      .addMetadata(Var)
+      .addImm(0U)
+      .addMetadata(Orig.getDebugVariable())
       .addMetadata(Expr);
 }
+
+void llvm::updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex) {
+  const DIExpression *Expr = computeExprForSpill(Orig);
+  Orig.getOperand(0).ChangeToFrameIndex(FrameIndex);
+  Orig.getOperand(1).ChangeToImmediate(0U);
+  Orig.getOperand(3).setMetadata(Expr);
+}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index b5621a09c6ff..ed16a2b6084c 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -13,10 +13,10 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <utility>
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index c7113f1fdc47..75d449c7ac6f 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -1,4 +1,4 @@
-//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,26 +16,42 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "machinelicm"
@@ -68,6 +84,7 @@ STATISTIC(NumPostRAHoisted,
           "Number of machine instructions hoisted out of loops post regalloc");
 
 namespace {
+
   class MachineLICM : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetLoweringBase *TLI;
@@ -75,7 +92,7 @@ namespace {
     const MachineFrameInfo *MFI;
     MachineRegisterInfo *MRI;
     TargetSchedModel SchedModel;
-    bool PreRegAlloc;
+    bool PreRegAlloc = true;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
@@ -89,7 +106,7 @@ namespace {
     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
 
     // Exit blocks for CurLoop.
-    SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+    SmallVector<MachineBasicBlock *, 8> ExitBlocks;
 
     bool isExitBlock(const MachineBasicBlock *MBB) const {
       return is_contained(ExitBlocks, MBB);
@@ -107,7 +124,7 @@ namespace {
     SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
 
     // For each opcode, keep a list of potential CSE instructions.
-    DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+    DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap;
 
     enum {
       SpeculateFalse   = 0,
@@ -122,15 +139,15 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    MachineLICM() :
-      MachineFunctionPass(ID), PreRegAlloc(true) {
-        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
-      }
 
-    explicit MachineLICM(bool PreRA) :
-      MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+    MachineLICM() : MachineFunctionPass(ID) {
+      initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+    }
+
+    explicit MachineLICM(bool PreRA)
+        : MachineFunctionPass(ID), PreRegAlloc(PreRA) {
         initializeMachineLICMPass(*PassRegistry::getPassRegistry());
-      }
+    }
 
     bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -157,6 +174,7 @@ namespace {
       MachineInstr *MI;
       unsigned      Def;
       int           FI;
+
       CandidateInfo(MachineInstr *mi, unsigned def, int fi)
         : MI(mi), Def(def), FI(fi) {}
     };
@@ -233,10 +251,13 @@ namespace {
 
     MachineBasicBlock *getCurPreheader();
   };
+
 } // end anonymous namespace
 
 char MachineLICM::ID = 0;
+
 char &llvm::MachineLICMID = MachineLICM::ID;
+
 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
                       "Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -259,7 +280,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
 }
 
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   Changed = FirstInLoop = false;
@@ -425,7 +446,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
   // Only consider reloads for now and remats which do not have register
   // operands. FIXME: Consider unfold load folding instructions.
   if (Def && !RuledOut) {
-    int FI = INT_MIN;
+    int FI = std::numeric_limits<int>::min();
     if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
         (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
       Candidates.push_back(CandidateInfo(MI, Def, FI));
@@ -492,7 +513,7 @@ void MachineLICM::HoistRegionPostRA() {
   //    registers read by the terminator. Similarly its def should not be
   //    clobbered by the terminator.
   for (CandidateInfo &Candidate : Candidates) {
-    if (Candidate.FI != INT_MIN &&
+    if (Candidate.FI != std::numeric_limits<int>::min() &&
         StoredFIs.count(Candidate.FI))
       continue;
 
@@ -542,8 +563,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
 
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
-  DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
-               << MI->getParent()->getNumber() << ": " << *MI);
+  DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from "
+               << printMBBReference(*MI->getParent()) << ": " << *MI);
 
   // Splice the instruction to the preheader.
   MachineBasicBlock *MBB = MI->getParent();
@@ -580,14 +601,14 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
 }
 
 void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
-  DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n');
+  DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
 
   // Remember livein register pressure.
   BackTrace.push_back(RegPressure);
 }
 
 void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
-  DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n');
+  DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
   BackTrace.pop_back();
 }
 
@@ -617,7 +638,6 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
 /// specified header block, and that are in the current loop) in depth first
 /// order w.r.t the DominatorTree. This allows us to visit definitions before
 /// uses, allowing us to hoist a loop body in one pass without iteration.
-///
 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
   MachineBasicBlock *Preheader = getCurPreheader();
   if (!Preheader)
@@ -836,7 +856,7 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
 /// Return true if this machine instruction loads from global offset table or
 /// constant pool.
 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
-  assert (MI.mayLoad() && "Expected MI that loads!");
+  assert(MI.mayLoad() && "Expected MI that loads!");
 
   // If we lost memory operands, conservatively assume that the instruction
   // reads from everything..
@@ -876,7 +896,6 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
 /// I.e., all virtual register operands are defined outside of the loop,
 /// physical registers aren't accessed explicitly, and there are no side
 /// effects that aren't captured by the operands or other flags.
-///
 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   if (!IsLICMCandidate(I))
     return false;
@@ -898,8 +917,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
         // However, if the physreg is known to always be caller saved/restored
         // then this use is safe to hoist.
         if (!MRI->isConstantPhysReg(Reg) &&
-            !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent())))
-            return false;
+            !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF())))
+          return false;
         // Otherwise it's safe to move.
         continue;
       } else if (!MO.isDead()) {
@@ -928,7 +947,6 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   return true;
 }
 
-
 /// Return true if the specified instruction is used by a phi node and hoisting
 /// it could cause a copy to be inserted.
 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
@@ -1173,7 +1191,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
                                     &LoadRegIndex);
   if (NewOpc == 0) return nullptr;
   const MCInstrDesc &MID = TII->get(NewOpc);
-  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFunction &MF = *MI->getMF();
   const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
   unsigned Reg = MRI->createVirtualRegister(RC);
@@ -1233,7 +1251,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
 /// the existing instruction rather than hoisting the instruction to the
 /// preheader.
 bool MachineLICM::EliminateCSE(MachineInstr *MI,
-          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+          DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
   // the undef property onto uses.
   if (CI == CSEMap.end() || MI->isImplicitDef())
@@ -1292,7 +1310,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 /// the loop.
 bool MachineLICM::MayCSE(MachineInstr *MI) {
   unsigned Opcode = MI->getOpcode();
-  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+  DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
     CI = CSEMap.find(Opcode);
   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
   // the undef property onto uses.
@@ -1318,9 +1336,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
   DEBUG({
       dbgs() << "Hoisting " << *MI;
       if (MI->getParent()->getBasicBlock())
-        dbgs() << " from BB#" << MI->getParent()->getNumber();
+        dbgs() << " from " << printMBBReference(*MI->getParent());
       if (Preheader->getBasicBlock())
-        dbgs() << " to BB#" << Preheader->getNumber();
+        dbgs() << " to " << printMBBReference(*Preheader);
       dbgs() << "\n";
     });
 
@@ -1333,7 +1351,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
 
   // Look for opportunity to CSE the hoisted instruction.
   unsigned Opcode = MI->getOpcode();
-  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+  DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
     CI = CSEMap.find(Opcode);
   if (!EliminateCSE(MI, CI)) {
     // Otherwise, splice the instruction to the preheader.
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 825290a438a6..8f0b89657d02 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -26,7 +27,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
@@ -207,9 +207,9 @@ bool MachineModuleInfo::doInitialization(Module &M) {
   ObjFileMMI = nullptr;
   CurCallSite = 0;
   DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
+  HasSplitStack = HasNosplitStack = false;
   AddrLabelSymbols = nullptr;
   TheModule = &M;
-
   return false;
 }
 
@@ -276,7 +276,8 @@ MachineModuleInfo::getOrCreateMachineFunction(const Function &F) {
   MachineFunction *MF;
   if (I.second) {
     // No pre-existing machine function, create a new one.
-    MF = new MachineFunction(&F, TM, NextFnNum++, *this);
+    const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F);
+    MF = new MachineFunction(F, TM, STI, NextFnNum++, *this);
     // Update the set entry.
     I.first->second.reset(MF);
   } else {
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 22d519e5d88f..07b173bc94f8 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/MC/MCSymbol.h"
+
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -24,21 +26,17 @@ using namespace llvm;
 void MachineModuleInfoMachO::anchor() {}
 void MachineModuleInfoELF::anchor() {}
 
-static int SortSymbolPair(const void *LHS, const void *RHS) {
-  typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
-  const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
-  const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
-  return LHSS->getName().compare(RHSS->getName());
+using PairTy = std::pair<MCSymbol *, MachineModuleInfoImpl::StubValueTy>;
+static int SortSymbolPair(const PairTy *LHS, const PairTy *RHS) {
+  return LHS->first->getName().compare(RHS->first->getName());
 }
 
 MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
     DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
   MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
 
-  if (!List.empty())
-    qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+  array_pod_sort(List.begin(), List.end(), SortSymbolPair);
 
   Map.clear();
   return List;
 }
-
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
new file mode 100644
index 000000000000..d17c481862a1
--- /dev/null
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -0,0 +1,936 @@
+//===- lib/CodeGen/MachineOperand.cpp -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Methods common to all machine operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<int>
+    PrintRegMaskNumRegs("print-regmask-num-regs",
+                        cl::desc("Number of registers to limit to when "
+                                 "printing regmask operands in IR dumps. "
+                                 "unlimited = -1"),
+                        cl::init(32), cl::Hidden);
+
+static const MachineFunction *getMFIfAvailable(const MachineOperand &MO) {
+  if (const MachineInstr *MI = MO.getParent())
+    if (const MachineBasicBlock *MBB = MI->getParent())
+      if (const MachineFunction *MF = MBB->getParent())
+        return MF;
+  return nullptr;
+}
+static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
+  return const_cast<MachineFunction *>(
+      getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+  if (getReg() == Reg)
+    return; // No change.
+
+  // Otherwise, we have to change the register.  If this operand is embedded
+  // into a machine function, we need to update the old and new register's
+  // use/def lists.
+  if (MachineFunction *MF = getMFIfAvailable(*this)) {
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+    MRI.removeRegOperandFromUseList(this);
+    SmallContents.RegNo = Reg;
+    MRI.addRegOperandToUseList(this);
+    return;
+  }
+
+  // Otherwise, just change the register, no problem.  :)
+  SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+                                  const TargetRegisterInfo &TRI) {
+  assert(TargetRegisterInfo::isVirtualRegister(Reg));
+  if (SubIdx && getSubReg())
+    SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+  setReg(Reg);
+  if (SubIdx)
+    setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  if (getSubReg()) {
+    Reg = TRI.getSubReg(Reg, getSubReg());
+    // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+    // That won't happen in legal code.
+    setSubReg(0);
+    if (isDef())
+      setIsUndef(false);
+  }
+  setReg(Reg);
+}
+
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+  assert(isReg() && "Wrong MachineOperand accessor");
+  assert((!Val || !isDebug()) && "Marking a debug operation as def");
+  if (IsDef == Val)
+    return;
+  assert(!IsDeadOrKill && "Changing def/use with dead/kill set not supported");
+  // MRI may keep uses and defs in different list positions.
+  if (MachineFunction *MF = getMFIfAvailable(*this)) {
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+    MRI.removeRegOperandFromUseList(this);
+    IsDef = Val;
+    MRI.addRegOperandToUseList(this);
+    return;
+  }
+  IsDef = Val;
+}
+
+bool MachineOperand::isRenamable() const {
+  assert(isReg() && "Wrong MachineOperand accessor");
+  assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+         "isRenamable should only be checked on physical registers");
+  return IsRenamable;
+}
+
+void MachineOperand::setIsRenamable(bool Val) {
+  assert(isReg() && "Wrong MachineOperand accessor");
+  assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+         "setIsRenamable should only be called on physical registers");
+  if (const MachineInstr *MI = getParent())
+    if ((isDef() && MI->hasExtraDefRegAllocReq()) ||
+        (isUse() && MI->hasExtraSrcRegAllocReq()))
+      assert(!Val && "isRenamable should be false for "
+                     "hasExtraDefRegAllocReq/hasExtraSrcRegAllocReq opcodes");
+  IsRenamable = Val;
+}
+
+void MachineOperand::setIsRenamableIfNoExtraRegAllocReq() {
+  if (const MachineInstr *MI = getParent())
+    if ((isDef() && MI->hasExtraDefRegAllocReq()) ||
+        (isUse() && MI->hasExtraSrcRegAllocReq()))
+      return;
+
+  setIsRenamable(true);
+}
+
+// If this operand is currently a register operand, and if this is in a
+// function, deregister the operand from the register's use/def list.
+void MachineOperand::removeRegFromUses() {
+  if (!isReg() || !isOnRegUseList())
+    return;
+
+  if (MachineFunction *MF = getMFIfAvailable(*this))
+    MF->getRegInfo().removeRegOperandFromUseList(this);
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value.  If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+  assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+  removeRegFromUses();
+
+  OpKind = MO_Immediate;
+  Contents.ImmVal = ImmVal;
+}
+
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
+  assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+  removeRegFromUses();
+
+  OpKind = MO_FPImmediate;
+  Contents.CFP = FPImm;
+}
+
+void MachineOperand::ChangeToES(const char *SymName,
+                                unsigned char TargetFlags) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into an external symbol");
+
+  removeRegFromUses();
+
+  OpKind = MO_ExternalSymbol;
+  Contents.OffsetedInfo.Val.SymbolName = SymName;
+  setOffset(0); // Offset is always 0.
+  setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into an MCSymbol");
+
+  removeRegFromUses();
+
+  OpKind = MO_MCSymbol;
+  Contents.Sym = Sym;
+}
+
+void MachineOperand::ChangeToFrameIndex(int Idx) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into a FrameIndex");
+
+  removeRegFromUses();
+
+  OpKind = MO_FrameIndex;
+  setIndex(Idx);
+}
+
+void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
+                                         unsigned char TargetFlags) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into a FrameIndex");
+
+  removeRegFromUses();
+
+  OpKind = MO_TargetIndex;
+  setIndex(Idx);
+  setOffset(Offset);
+  setTargetFlags(TargetFlags);
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value.  If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+                                      bool isKill, bool isDead, bool isUndef,
+                                      bool isDebug) {
+  MachineRegisterInfo *RegInfo = nullptr;
+  if (MachineFunction *MF = getMFIfAvailable(*this))
+    RegInfo = &MF->getRegInfo();
+  // If this operand is already a register operand, remove it from the
+  // register's use/def lists.
+  bool WasReg = isReg();
+  if (RegInfo && WasReg)
+    RegInfo->removeRegOperandFromUseList(this);
+
+  // Change this to a register and set the reg#.
+  assert(!(isDead && !isDef) && "Dead flag on non-def");
+  assert(!(isKill && isDef) && "Kill flag on def");
+  OpKind = MO_Register;
+  SmallContents.RegNo = Reg;
+  SubReg_TargetFlags = 0;
+  IsDef = isDef;
+  IsImp = isImp;
+  IsDeadOrKill = isKill | isDead;
+  IsRenamable = false;
+  IsUndef = isUndef;
+  IsInternalRead = false;
+  IsEarlyClobber = false;
+  IsDebug = isDebug;
+  // Ensure isOnRegUseList() returns false.
+  Contents.Reg.Prev = nullptr;
+  // Preserve the tie when the operand was already a register.
+  if (!WasReg)
+    TiedTo = 0;
+
+  // If this operand is embedded in a function, add the operand to the
+  // register's use/def list.
+  if (RegInfo)
+    RegInfo->addRegOperandToUseList(this);
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+  if (getType() != Other.getType() ||
+      getTargetFlags() != Other.getTargetFlags())
+    return false;
+
+  switch (getType()) {
+  case MachineOperand::MO_Register:
+    return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+           getSubReg() == Other.getSubReg();
+  case MachineOperand::MO_Immediate:
+    return getImm() == Other.getImm();
+  case MachineOperand::MO_CImmediate:
+    return getCImm() == Other.getCImm();
+  case MachineOperand::MO_FPImmediate:
+    return getFPImm() == Other.getFPImm();
+  case MachineOperand::MO_MachineBasicBlock:
+    return getMBB() == Other.getMBB();
+  case MachineOperand::MO_FrameIndex:
+    return getIndex() == Other.getIndex();
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_TargetIndex:
+    return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+  case MachineOperand::MO_JumpTableIndex:
+    return getIndex() == Other.getIndex();
+  case MachineOperand::MO_GlobalAddress:
+    return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+  case MachineOperand::MO_ExternalSymbol:
+    return strcmp(getSymbolName(), Other.getSymbolName()) == 0 &&
+           getOffset() == Other.getOffset();
+  case MachineOperand::MO_BlockAddress:
+    return getBlockAddress() == Other.getBlockAddress() &&
+           getOffset() == Other.getOffset();
+  case MachineOperand::MO_RegisterMask:
+  case MachineOperand::MO_RegisterLiveOut: {
+    // Shallow compare of the two RegMasks
+    const uint32_t *RegMask = getRegMask();
+    const uint32_t *OtherRegMask = Other.getRegMask();
+    if (RegMask == OtherRegMask)
+      return true;
+
+    if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+      // Calculate the size of the RegMask
+      const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+      unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+
+      // Deep compare of the two RegMasks
+      return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
+    }
+    // We don't know the size of the RegMask, so we can't deep compare the two
+    // reg masks.
+    return false;
+  }
+  case MachineOperand::MO_MCSymbol:
+    return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_CFIIndex:
+    return getCFIIndex() == Other.getCFIIndex();
+  case MachineOperand::MO_Metadata:
+    return getMetadata() == Other.getMetadata();
+  case MachineOperand::MO_IntrinsicID:
+    return getIntrinsicID() == Other.getIntrinsicID();
+  case MachineOperand::MO_Predicate:
+    return getPredicate() == Other.getPredicate();
+  }
+  llvm_unreachable("Invalid machine operand type");
+}
+
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    // Register operands don't have target flags.
+    return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+  case MachineOperand::MO_Immediate:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+  case MachineOperand::MO_CImmediate:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+  case MachineOperand::MO_FPImmediate:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+  case MachineOperand::MO_MachineBasicBlock:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+  case MachineOperand::MO_FrameIndex:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_TargetIndex:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+                        MO.getOffset());
+  case MachineOperand::MO_JumpTableIndex:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+  case MachineOperand::MO_ExternalSymbol:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+                        MO.getSymbolName());
+  case MachineOperand::MO_GlobalAddress:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+                        MO.getOffset());
+  case MachineOperand::MO_BlockAddress:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(),
+                        MO.getOffset());
+  case MachineOperand::MO_RegisterMask:
+  case MachineOperand::MO_RegisterLiveOut:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+  case MachineOperand::MO_Metadata:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+  case MachineOperand::MO_MCSymbol:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+  case MachineOperand::MO_CFIIndex:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
+  case MachineOperand::MO_IntrinsicID:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
+  case MachineOperand::MO_Predicate:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
+  }
+  llvm_unreachable("Invalid machine operand type");
+}
+
+// Try to crawl up to the machine function and get TRI and IntrinsicInfo from
+// it.
+static void tryToGetTargetInfo(const MachineOperand &MO,
+                               const TargetRegisterInfo *&TRI,
+                               const TargetIntrinsicInfo *&IntrinsicInfo) {
+  if (const MachineFunction *MF = getMFIfAvailable(MO)) {
+    TRI = MF->getSubtarget().getRegisterInfo();
+    IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
+  }
+}
+
+static void printOffset(raw_ostream &OS, int64_t Offset) {
+  if (Offset == 0)
+    return;
+  if (Offset < 0) {
+    OS << " - " << -Offset;
+    return;
+  }
+  OS << " + " << Offset;
+}
+
+static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+  assert(TII && "expected instruction info");
+  auto Indices = TII->getSerializableTargetIndices();
+  auto Found = find_if(Indices, [&](const std::pair<int, const char *> &I) {
+    return I.first == Index;
+  });
+  if (Found != Indices.end())
+    return Found->second;
+  return nullptr;
+}
+
+static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
+  auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+  for (const auto &I : Flags) {
+    if (I.first == TF) {
+      return I.second;
+    }
+  }
+  return nullptr;
+}
+
+void MachineOperand::printSubregIdx(raw_ostream &OS, uint64_t Index,
+                                    const TargetRegisterInfo *TRI) {
+  OS << "%subreg.";
+  if (TRI)
+    OS << TRI->getSubRegIndexName(Index);
+  else
+    OS << Index;
+}
+
+void MachineOperand::printTargetFlags(raw_ostream &OS,
+                                      const MachineOperand &Op) {
+  if (!Op.getTargetFlags())
+    return;
+  const MachineFunction *MF = getMFIfAvailable(Op);
+  if (!MF)
+    return;
+
+  const auto *TII = MF->getSubtarget().getInstrInfo();
+  assert(TII && "expected instruction info");
+  auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
+  OS << "target-flags(";
+  const bool HasDirectFlags = Flags.first;
+  const bool HasBitmaskFlags = Flags.second;
+  if (!HasDirectFlags && !HasBitmaskFlags) {
+    OS << "<unknown>) ";
+    return;
+  }
+  if (HasDirectFlags) {
+    if (const auto *Name = getTargetFlagName(TII, Flags.first))
+      OS << Name;
+    else
+      OS << "<unknown target flag>";
+  }
+  if (!HasBitmaskFlags) {
+    OS << ") ";
+    return;
+  }
+  bool IsCommaNeeded = HasDirectFlags;
+  unsigned BitMask = Flags.second;
+  auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
+  for (const auto &Mask : BitMasks) {
+    // Check if the flag's bitmask has the bits of the current mask set.
+    if ((BitMask & Mask.first) == Mask.first) {
+      if (IsCommaNeeded)
+        OS << ", ";
+      IsCommaNeeded = true;
+      OS << Mask.second;
+      // Clear the bits which were serialized from the flag's bitmask.
+      BitMask &= ~(Mask.first);
+    }
+  }
+  if (BitMask) {
+    // When the resulting flag's bitmask isn't zero, we know that we didn't
+    // serialize all of the bit flags.
+    if (IsCommaNeeded)
+      OS << ", ";
+    OS << "<unknown bitmask target flag>";
+  }
+  OS << ") ";
+}
+
+void MachineOperand::printSymbol(raw_ostream &OS, MCSymbol &Sym) {
+  OS << "<mcsymbol " << Sym << ">";
+}
+
+void MachineOperand::printStackObjectReference(raw_ostream &OS,
+                                               unsigned FrameIndex,
+                                               bool IsFixed, StringRef Name) {
+  if (IsFixed) {
+    OS << "%fixed-stack." << FrameIndex;
+    return;
+  }
+
+  OS << "%stack." << FrameIndex;
+  if (!Name.empty())
+    OS << '.' << Name;
+}
+
+void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
+                           const TargetIntrinsicInfo *IntrinsicInfo) const {
+  tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
+  ModuleSlotTracker DummyMST(nullptr);
+  print(OS, DummyMST, LLT{}, /*PrintDef=*/false,
+        /*ShouldPrintRegisterTies=*/true,
+        /*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
+}
+
+void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
+                           LLT TypeToPrint, bool PrintDef,
+                           bool ShouldPrintRegisterTies,
+                           unsigned TiedOperandIdx,
+                           const TargetRegisterInfo *TRI,
+                           const TargetIntrinsicInfo *IntrinsicInfo) const {
+  printTargetFlags(OS, *this);
+  switch (getType()) {
+  case MachineOperand::MO_Register: {
+    unsigned Reg = getReg();
+    if (isImplicit())
+      OS << (isDef() ? "implicit-def " : "implicit ");
+    else if (PrintDef && isDef())
+      // Print the 'def' flag only when the operand is defined after '='.
+      OS << "def ";
+    if (isInternalRead())
+      OS << "internal ";
+    if (isDead())
+      OS << "dead ";
+    if (isKill())
+      OS << "killed ";
+    if (isUndef())
+      OS << "undef ";
+    if (isEarlyClobber())
+      OS << "early-clobber ";
+    if (isDebug())
+      OS << "debug-use ";
+    if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
+      OS << "renamable ";
+    OS << printReg(Reg, TRI);
+    // Print the sub register.
+    if (unsigned SubReg = getSubReg()) {
+      if (TRI)
+        OS << '.' << TRI->getSubRegIndexName(SubReg);
+      else
+        OS << ".subreg" << SubReg;
+    }
+    // Print the register class / bank.
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+        const MachineRegisterInfo &MRI = MF->getRegInfo();
+        if (!PrintDef || MRI.def_empty(Reg)) {
+          OS << ':';
+          OS << printRegClassOrBank(Reg, MRI, TRI);
+        }
+      }
+    }
+    // Print ties.
+    if (ShouldPrintRegisterTies && isTied() && !isDef())
+      OS << "(tied-def " << TiedOperandIdx << ")";
+    // Print types.
+    if (TypeToPrint.isValid())
+      OS << '(' << TypeToPrint << ')';
+    break;
+  }
+  case MachineOperand::MO_Immediate:
+    OS << getImm();
+    break;
+  case MachineOperand::MO_CImmediate:
+    getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+    break;
+  case MachineOperand::MO_FPImmediate:
+    if (getFPImm()->getType()->isFloatTy()) {
+      OS << getFPImm()->getValueAPF().convertToFloat();
+    } else if (getFPImm()->getType()->isHalfTy()) {
+      APFloat APF = getFPImm()->getValueAPF();
+      bool Unused;
+      APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
+      OS << "half " << APF.convertToFloat();
+    } else if (getFPImm()->getType()->isFP128Ty()) {
+      APFloat APF = getFPImm()->getValueAPF();
+      SmallString<16> Str;
+      getFPImm()->getValueAPF().toString(Str);
+      OS << "quad " << Str;
+    } else if (getFPImm()->getType()->isX86_FP80Ty()) {
+      APFloat APF = getFPImm()->getValueAPF();
+      OS << "x86_fp80 0xK";
+      APInt API = APF.bitcastToAPInt();
+      OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
+                                 /*Upper=*/true);
+      OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+                                 /*Upper=*/true);
+    } else {
+      OS << getFPImm()->getValueAPF().convertToDouble();
+    }
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    OS << printMBBReference(*getMBB());
+    break;
+  case MachineOperand::MO_FrameIndex: {
+    int FrameIndex = getIndex();
+    bool IsFixed = false;
+    StringRef Name;
+    if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+      const MachineFrameInfo &MFI = MF->getFrameInfo();
+      IsFixed = MFI.isFixedObjectIndex(FrameIndex);
+      if (const AllocaInst *Alloca = MFI.getObjectAllocation(FrameIndex))
+        if (Alloca->hasName())
+          Name = Alloca->getName();
+      if (IsFixed)
+        FrameIndex -= MFI.getObjectIndexBegin();
+    }
+    printStackObjectReference(OS, FrameIndex, IsFixed, Name);
+    break;
+  }
+  case MachineOperand::MO_ConstantPoolIndex:
+    OS << "%const." << getIndex();
+    printOffset(OS, getOffset());
+    break;
+  case MachineOperand::MO_TargetIndex: {
+    OS << "target-index(";
+    const char *Name = "<unknown>";
+    if (const MachineFunction *MF = getMFIfAvailable(*this))
+      if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex()))
+        Name = TargetIndexName;
+    OS << Name << ')';
+    printOffset(OS, getOffset());
+    break;
+  }
+  case MachineOperand::MO_JumpTableIndex:
+    OS << printJumpTableEntryReference(getIndex());
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
+    printOffset(OS, getOffset());
+    break;
+  case MachineOperand::MO_ExternalSymbol: {
+    StringRef Name = getSymbolName();
+    OS << '$';
+    if (Name.empty()) {
+      OS << "\"\"";
+    } else {
+      printLLVMNameWithoutPrefix(OS, Name);
+    }
+    printOffset(OS, getOffset());
+    break;
+  }
+  case MachineOperand::MO_BlockAddress:
+    OS << '<';
+    getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST);
+    if (getOffset())
+      OS << "+" << getOffset();
+    OS << '>';
+    break;
+  case MachineOperand::MO_RegisterMask: {
+    OS << "<regmask";
+    if (TRI) {
+      unsigned NumRegsInMask = 0;
+      unsigned NumRegsEmitted = 0;
+      for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+        unsigned MaskWord = i / 32;
+        unsigned MaskBit = i % 32;
+        if (getRegMask()[MaskWord] & (1 << MaskBit)) {
+          if (PrintRegMaskNumRegs < 0 ||
+              NumRegsEmitted <= static_cast<unsigned>(PrintRegMaskNumRegs)) {
+            OS << " " << printReg(i, TRI);
+            NumRegsEmitted++;
+          }
+          NumRegsInMask++;
+        }
+      }
+      if (NumRegsEmitted != NumRegsInMask)
+        OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
+    } else {
+      OS << " ...";
+    }
+    OS << ">";
+    break;
+  }
+  case MachineOperand::MO_RegisterLiveOut: {
+    const uint32_t *RegMask = getRegLiveOut();
+    OS << "liveout(";
+    if (!TRI) {
+      OS << "<unknown>";
+    } else {
+      bool IsCommaNeeded = false;
+      for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
+        if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
+          if (IsCommaNeeded)
+            OS << ", ";
+          OS << printReg(Reg, TRI);
+          IsCommaNeeded = true;
+        }
+      }
+    }
+    OS << ")";
+    break;
+  }
+  case MachineOperand::MO_Metadata:
+    getMetadata()->printAsOperand(OS, MST);
+    break;
+  case MachineOperand::MO_MCSymbol:
+    printSymbol(OS, *getMCSymbol());
+    break;
+  case MachineOperand::MO_CFIIndex:
+    OS << "<call frame instruction>";
+    break;
+  case MachineOperand::MO_IntrinsicID: {
+    Intrinsic::ID ID = getIntrinsicID();
+    if (ID < Intrinsic::num_intrinsics)
+      OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
+    else if (IntrinsicInfo)
+      OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
+    else
+      OS << "<intrinsic:" << ID << '>';
+    break;
+  }
+  case MachineOperand::MO_Predicate: {
+    auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
+    OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
+       << CmpInst::getPredicateName(Pred) << '>';
+    break;
+  }
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineOperand::dump() const { dbgs() << *this << '\n'; }
+#endif
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const { return AddrSpace; }
+
+/// isDereferenceable - Return true if V is always dereferenceable for
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+                                           const DataLayout &DL) const {
+  if (!V.is<const Value *>())
+    return false;
+
+  const Value *BasePtr = V.get<const Value *>();
+  if (BasePtr == nullptr)
+    return false;
+
+  return isDereferenceableAndAlignedPointer(
+      BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
+  return MachinePointerInfo(MF.getPSVManager().getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
+                                                     int FI, int64_t Offset) {
+  return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
+  return MachinePointerInfo(MF.getPSVManager().getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
+  return MachinePointerInfo(MF.getPSVManager().getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
+                                                int64_t Offset, uint8_t ID) {
+  return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID);
+}
+
+MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {
+  return MachinePointerInfo(MF.getDataLayout().getAllocaAddrSpace());
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
+                                     uint64_t s, unsigned int a,
+                                     const AAMDNodes &AAInfo,
+                                     const MDNode *Ranges, SyncScope::ID SSID,
+                                     AtomicOrdering Ordering,
+                                     AtomicOrdering FailureOrdering)
+    : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
+      AAInfo(AAInfo), Ranges(Ranges) {
+  assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
+          isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
+         "invalid pointer value");
+  assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+  assert((isLoad() || isStore()) && "Not a load/store!");
+
+  AtomicInfo.SSID = static_cast<unsigned>(SSID);
+  assert(getSyncScopeID() == SSID && "Value truncated");
+  AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
+  assert(getOrdering() == Ordering && "Value truncated");
+  AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
+  assert(getFailureOrdering() == FailureOrdering && "Value truncated");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(getOffset());
+  ID.AddInteger(Size);
+  ID.AddPointer(getOpaqueValue());
+  ID.AddInteger(getFlags());
+  ID.AddInteger(getBaseAlignment());
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+  // The Value and Offset may differ due to CSE. But the flags and size
+  // should be the same.
+  assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+  assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+  if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+    // Update the alignment value.
+    BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1;
+    // Also update the base and offset, because the new alignment may
+    // not be applicable with the old ones.
+    PtrInfo = MMO->PtrInfo;
+  }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+  return MinAlign(getBaseAlignment(), getOffset());
+}
+
+void MachineMemOperand::print(raw_ostream &OS) const {
+  ModuleSlotTracker DummyMST(nullptr);
+  print(OS, DummyMST);
+}
+void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
+  assert((isLoad() || isStore()) && "SV has to be a load, store or both.");
+
+  if (isVolatile())
+    OS << "Volatile ";
+
+  if (isLoad())
+    OS << "LD";
+  if (isStore())
+    OS << "ST";
+  OS << getSize();
+
+  // Print the address information.
+  OS << "[";
+  if (const Value *V = getValue())
+    V->printAsOperand(OS, /*PrintType=*/false, MST);
+  else if (const PseudoSourceValue *PSV = getPseudoValue())
+    PSV->printCustom(OS);
+  else
+    OS << "<unknown>";
+
+  unsigned AS = getAddrSpace();
+  if (AS != 0)
+    OS << "(addrspace=" << AS << ')';
+
+  // If the alignment of the memory reference itself differs from the alignment
+  // of the base pointer, print the base alignment explicitly, next to the base
+  // pointer.
+  if (getBaseAlignment() != getAlignment())
+    OS << "(align=" << getBaseAlignment() << ")";
+
+  if (getOffset() != 0)
+    OS << "+" << getOffset();
+  OS << "]";
+
+  // Print the alignment of the reference.
+  if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize())
+    OS << "(align=" << getAlignment() << ")";
+
+  // Print TBAA info.
+  if (const MDNode *TBAAInfo = getAAInfo().TBAA) {
+    OS << "(tbaa=";
+    if (TBAAInfo->getNumOperands() > 0)
+      TBAAInfo->getOperand(0)->printAsOperand(OS, MST);
+    else
+      OS << "<unknown>";
+    OS << ")";
+  }
+
+  // Print AA scope info.
+  if (const MDNode *ScopeInfo = getAAInfo().Scope) {
+    OS << "(alias.scope=";
+    if (ScopeInfo->getNumOperands() > 0)
+      for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
+        ScopeInfo->getOperand(i)->printAsOperand(OS, MST);
+        if (i != ie - 1)
+          OS << ",";
+      }
+    else
+      OS << "<unknown>";
+    OS << ")";
+  }
+
+  // Print AA noalias scope info.
+  if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) {
+    OS << "(noalias=";
+    if (NoAliasInfo->getNumOperands() > 0)
+      for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
+        NoAliasInfo->getOperand(i)->printAsOperand(OS, MST);
+        if (i != ie - 1)
+          OS << ",";
+      }
+    else
+      OS << "<unknown>";
+    OS << ")";
+  }
+
+  if (const MDNode *Ranges = getRanges()) {
+    unsigned NumRanges = Ranges->getNumOperands();
+    if (NumRanges != 0) {
+      OS << "(ranges=";
+
+      for (unsigned I = 0; I != NumRanges; ++I) {
+        Ranges->getOperand(I)->printAsOperand(OS, MST);
+        if (I != NumRanges - 1)
+          OS << ',';
+      }
+
+      OS << ')';
+    }
+  }
+
+  if (isNonTemporal())
+    OS << "(nontemporal)";
+  if (isDereferenceable())
+    OS << "(dereferenceable)";
+  if (isInvariant())
+    OS << "(invariant)";
+  if (getFlags() & MOTargetFlag1)
+    OS << "(flag1)";
+  if (getFlags() & MOTargetFlag2)
+    OS << "(flag2)";
+  if (getFlags() & MOTargetFlag3)
+    OS << "(flag3)";
+}
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 73c3428a6e53..ca4452218da1 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -16,7 +16,6 @@
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/LLVMContext.h"
 
@@ -51,24 +50,15 @@ void MachineOptimizationRemarkEmitter::emit(
   auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon);
   computeHotness(OptDiag);
 
-  LLVMContext &Ctx = MF.getFunction()->getContext();
+  LLVMContext &Ctx = MF.getFunction().getContext();
 
-  // If a diagnostic has a hotness value, then only emit it if its hotness
-  // meets the threshold.
-  if (OptDiag.getHotness() &&
-      *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) {
+  // Only emit it if its hotness meets the threshold.
+  if (OptDiag.getHotness().getValueOr(0) <
+      Ctx.getDiagnosticsHotnessThreshold()) {
     return;
   }
 
-  yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
-  if (Out) {
-    auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
-    *Out << P;
-  }
-  // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
-  // from here to clang.
-  if (!OptDiag.isVerbose() || shouldEmitVerbose())
-    Ctx.diagnose(OptDiag);
+  Ctx.diagnose(OptDiag);
 }
 
 MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass()
@@ -81,7 +71,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
     MachineFunction &MF) {
   MachineBlockFrequencyInfo *MBFI;
 
-  if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested())
+  if (MF.getFunction().getContext().getDiagnosticsHotnessRequested())
     MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
   else
     MBFI = nullptr;
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index fd6b2427891d..e4eb8802ac66 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -15,6 +15,23 @@
 /// instructions. If a sequence of instructions appears often, then it ought
 /// to be beneficial to pull out into a function.
 ///
+/// The MachineOutliner communicates with a given target using hooks defined in
+/// TargetInstrInfo.h. The target supplies the outliner with information on how
+/// a specific sequence of instructions should be outlined. This information
+/// is used to deduce the number of instructions necessary to
+///
+/// * Create an outlined function
+/// * Call that outlined function
+///
+/// Targets must implement
+///   * getOutliningCandidateInfo
+///   * insertOutlinerEpilogue
+///   * insertOutlinedCall
+///   * insertOutlinerPrologue
+///   * isFunctionSafeToOutlineFrom
+///
+/// in order to make use of the MachineOutliner.
+///
 /// This was originally presented at the 2016 LLVM Developers' Meeting in the
 /// talk "Reducing Code Size Using Outlining". For a high-level overview of
 /// how this pass works, the talk is available on YouTube at
@@ -42,19 +59,17 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <functional>
 #include <map>
 #include <sstream>
@@ -64,6 +79,7 @@
 #define DEBUG_TYPE "machine-outliner"
 
 using namespace llvm;
+using namespace ore;
 
 STATISTIC(NumOutlined, "Number of candidates outlined");
 STATISTIC(FunctionsCreated, "Number of functions created");
@@ -73,19 +89,32 @@ namespace {
 /// \brief An individual sequence of instructions to be replaced with a call to
 /// an outlined function.
 struct Candidate {
+private:
+  /// The start index of this \p Candidate in the instruction list.
+  unsigned StartIdx;
 
+  /// The number of instructions in this \p Candidate.
+  unsigned Len;
+
+public:
   /// Set to false if the candidate overlapped with another candidate.
   bool InCandidateList = true;
 
-  /// The start index of this \p Candidate.
-  size_t StartIdx;
+  /// \brief The index of this \p Candidate's \p OutlinedFunction in the list of
+  /// \p OutlinedFunctions.
+  unsigned FunctionIdx;
 
-  /// The number of instructions in this \p Candidate.
-  size_t Len;
+  /// Contains all target-specific information for this \p Candidate.
+  TargetInstrInfo::MachineOutlinerInfo MInfo;
 
-  /// The index of this \p Candidate's \p OutlinedFunction in the list of
-  /// \p OutlinedFunctions.
-  size_t FunctionIdx;
+  /// Return the number of instructions in this Candidate.
+  unsigned getLength() const { return Len; }
+
+  /// Return the start index of this candidate.
+  unsigned getStartIdx() const { return StartIdx; }
+
+  // Return the end index of this candidate.
+  unsigned getEndIdx() const { return StartIdx + Len - 1; }
 
   /// \brief The number of instructions that would be saved by outlining every
   /// candidate of this type.
@@ -96,51 +125,79 @@ struct Candidate {
   /// for some given candidate.
   unsigned Benefit = 0;
 
-  Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx)
+  Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx)
       : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {}
 
   Candidate() {}
 
   /// \brief Used to ensure that \p Candidates are outlined in an order that
   /// preserves the start and end indices of other \p Candidates.
-  bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; }
+  bool operator<(const Candidate &RHS) const {
+    return getStartIdx() > RHS.getStartIdx();
+  }
 };
 
 /// \brief The information necessary to create an outlined function for some
 /// class of candidate.
 struct OutlinedFunction {
 
+private:
+  /// The number of candidates for this \p OutlinedFunction.
+  unsigned OccurrenceCount = 0;
+
+public:
+  std::vector<std::shared_ptr<Candidate>> Candidates;
+
   /// The actual outlined function created.
   /// This is initialized after we go through and create the actual function.
   MachineFunction *MF = nullptr;
 
   /// A number assigned to this function which appears at the end of its name.
-  size_t Name;
-
-  /// The number of candidates for this OutlinedFunction.
-  size_t OccurrenceCount = 0;
+  unsigned Name;
 
   /// \brief The sequence of integers corresponding to the instructions in this
   /// function.
   std::vector<unsigned> Sequence;
 
-  /// The number of instructions this function would save.
-  unsigned Benefit = 0;
+  /// Contains all target-specific information for this \p OutlinedFunction.
+  TargetInstrInfo::MachineOutlinerInfo MInfo;
+
+  /// Return the number of candidates for this \p OutlinedFunction.
+  unsigned getOccurrenceCount() { return OccurrenceCount; }
+
+  /// Decrement the occurrence count of this OutlinedFunction and return the
+  /// new count.
+  unsigned decrement() {
+    assert(OccurrenceCount > 0 && "Can't decrement an empty function!");
+    OccurrenceCount--;
+    return getOccurrenceCount();
+  }
+
+  /// \brief Return the number of instructions it would take to outline this
+  /// function.
+  unsigned getOutliningCost() {
+    return (OccurrenceCount * MInfo.CallOverhead) + Sequence.size() +
+           MInfo.FrameOverhead;
+  }
 
-  /// \brief Set to true if candidates for this outlined function should be
-  /// replaced with tail calls to this OutlinedFunction.
-  bool IsTailCall = false;
+  /// \brief Return the number of instructions that would be saved by outlining
+  /// this function.
+  unsigned getBenefit() {
+    unsigned NotOutlinedCost = OccurrenceCount * Sequence.size();
+    unsigned OutlinedCost = getOutliningCost();
+    return (NotOutlinedCost < OutlinedCost) ? 0
+                                            : NotOutlinedCost - OutlinedCost;
+  }
 
-  OutlinedFunction(size_t Name, size_t OccurrenceCount,
+  OutlinedFunction(unsigned Name, unsigned OccurrenceCount,
                    const std::vector<unsigned> &Sequence,
-                   unsigned Benefit, bool IsTailCall)
-      : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence),
-        Benefit(Benefit), IsTailCall(IsTailCall)
-        {}
+                   TargetInstrInfo::MachineOutlinerInfo &MInfo)
+      : OccurrenceCount(OccurrenceCount), Name(Name), Sequence(Sequence),
+        MInfo(MInfo) {}
 };
 
 /// Represents an undefined index in the suffix tree.
-const size_t EmptyIdx = -1;
+const unsigned EmptyIdx = -1;
 
 /// A node in a suffix tree which represents a substring or suffix.
 ///
@@ -170,7 +227,7 @@ struct SuffixTreeNode {
   bool IsInTree = true;
 
   /// The start index of this node's substring in the main string.
-  size_t StartIdx = EmptyIdx;
+  unsigned StartIdx = EmptyIdx;
 
   /// The end index of this node's substring in the main string.
   ///
@@ -178,24 +235,23 @@ struct SuffixTreeNode {
   /// step in the construction algorithm. To avoid having to update O(N)
   /// nodes individually at the end of every step, the end index is stored
   /// as a pointer.
-  size_t *EndIdx = nullptr;
+  unsigned *EndIdx = nullptr;
 
   /// For leaves, the start index of the suffix represented by this node.
   ///
   /// For all other nodes, this is ignored.
-  size_t SuffixIdx = EmptyIdx;
+  unsigned SuffixIdx = EmptyIdx;
 
   /// \brief For internal nodes, a pointer to the internal node representing
   /// the same sequence with the first character chopped off.
   ///
-  /// This has two major purposes in the suffix tree. The first is as a
-  /// shortcut in Ukkonen's construction algorithm. One of the things that
+  /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
   /// Ukkonen's algorithm does to achieve linear-time construction is
   /// keep track of which node the next insert should be at. This makes each
   /// insert O(1), and there are a total of O(N) inserts. The suffix link
   /// helps with inserting children of internal nodes.
   ///
-  /// Say we add a child to an internal node with associated mapping S. The 
+  /// Say we add a child to an internal node with associated mapping S. The
   /// next insertion must be at the node representing S - its first character.
   /// This is given by the way that we iteratively build the tree in Ukkonen's
   /// algorithm. The main idea is to look at the suffixes of each prefix in the
@@ -204,27 +260,6 @@ struct SuffixTreeNode {
   /// move to the next insertion point in O(1) time. If we don't, then we'd
   /// have to query from the root, which takes O(N) time. This would make the
   /// construction algorithm O(N^2) rather than O(N).
-  ///
-  /// The suffix link is also used during the tree pruning process to let us
-  /// quickly throw out a bunch of potential overlaps. Say we have a sequence
-  /// S we want to outline. Then each of its suffixes contribute to at least
-  /// one overlapping case. Therefore, we can follow the suffix links
-  /// starting at the node associated with S to the root and "delete" those
-  /// nodes, save for the root. For each candidate, this removes
-  /// O(|candidate|) overlaps from the search space. We don't actually
-  /// completely invalidate these nodes though; doing that is far too
-  /// aggressive. Consider the following pathological string:
-  ///
-  /// 1 2 3 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3
-  ///
-  /// If we, for the sake of example, outlined 1 2 3, then we would throw
-  /// out all instances of 2 3. This isn't desirable. To get around this,
-  /// when we visit a link node, we decrement its occurrence count by the
-  /// number of sequences we outlined in the current step. In the pathological
-  /// example, the 2 3 node would have an occurrence count of 8, while the
-  /// 1 2 3 node would have an occurrence count of 2. Thus, the 2 3 node
-  /// would survive to the next round allowing us to outline the extra
-  /// instances of 2 3.
   SuffixTreeNode *Link = nullptr;
 
   /// The parent of this node. Every node except for the root has a parent.
@@ -234,11 +269,11 @@ struct SuffixTreeNode {
   ///
   /// This is equal to the number of leaf children of the string. It represents
   /// the number of suffixes that the node's string is a prefix of.
-  size_t OccurrenceCount = 0;
+  unsigned OccurrenceCount = 0;
 
   /// The length of the string formed by concatenating the edge labels from the
   /// root to this node.
-  size_t ConcatLen = 0;
+  unsigned ConcatLen = 0;
 
   /// Returns true if this node is a leaf.
   bool isLeaf() const { return SuffixIdx != EmptyIdx; }
@@ -260,7 +295,7 @@ struct SuffixTreeNode {
     return *EndIdx - StartIdx + 1;
   }
 
-  SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link,
+  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link,
                  SuffixTreeNode *Parent)
       : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
 
@@ -290,10 +325,16 @@ struct SuffixTreeNode {
 ///
 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
 class SuffixTree {
-private:
+public:
+  /// Stores each leaf node in the tree.
+  ///
+  /// This is used for finding outlining candidates.
+  std::vector<SuffixTreeNode *> LeafVector;
+
   /// Each element is an integer representing an instruction in the module.
   ArrayRef<unsigned> Str;
 
+private:
   /// Maintains each node in the tree.
   SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
 
@@ -303,11 +344,6 @@ private:
   /// \p NodeAllocator like every other node in the tree.
   SuffixTreeNode *Root = nullptr;
 
-  /// Stores each leaf node in the tree.
-  ///
-  /// This is used for finding outlining candidates.
-  std::vector<SuffixTreeNode *> LeafVector;
-
   /// Maintains the end indices of the internal nodes in the tree.
   ///
   /// Each internal node is guaranteed to never have its end index change
@@ -318,7 +354,7 @@ private:
   BumpPtrAllocator InternalEndIdxAllocator;
 
   /// The end index of each leaf in the tree.
-  size_t LeafEndIdx = -1;
+  unsigned LeafEndIdx = -1;
 
   /// \brief Helper struct which keeps track of the next insertion point in
   /// Ukkonen's algorithm.
@@ -327,10 +363,10 @@ private:
     SuffixTreeNode *Node;
 
     /// The index of the first character in the substring currently being added.
-    size_t Idx = EmptyIdx;
+    unsigned Idx = EmptyIdx;
 
     /// The length of the substring we have to add at the current step.
-    size_t Len = 0;
+    unsigned Len = 0;
   };
 
   /// \brief The point the next insertion will take place at in the
@@ -344,15 +380,13 @@ private:
   /// \param Edge The label on the edge leaving \p Parent to this node.
   ///
   /// \returns A pointer to the allocated leaf node.
-  SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx,
+  SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
                              unsigned Edge) {
 
     assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
 
-    SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, 
-                                                                   &LeafEndIdx,
-                                                                       nullptr,
-                                                                      &Parent);
+    SuffixTreeNode *N = new (NodeAllocator.Allocate())
+        SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent);
     Parent.Children[Edge] = N;
 
     return N;
@@ -366,18 +400,16 @@ private:
   /// \param Edge The label on the edge leaving \p Parent to this node.
   ///
   /// \returns A pointer to the allocated internal node.
-  SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx,
-                                     size_t EndIdx, unsigned Edge) {
+  SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
+                                     unsigned EndIdx, unsigned Edge) {
 
     assert(StartIdx <= EndIdx && "String can't start after it ends!");
     assert(!(!Parent && StartIdx != EmptyIdx) &&
-    "Non-root internal nodes must have parents!");
+           "Non-root internal nodes must have parents!");
 
-    size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx);
-    SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
-                                                                      E,
-                                                                      Root,
-                                                                      Parent);
+    unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
+    SuffixTreeNode *N = new (NodeAllocator.Allocate())
+        SuffixTreeNode(StartIdx, E, Root, Parent);
     if (Parent)
       Parent->Children[Edge] = N;
 
@@ -390,7 +422,7 @@ private:
   ///
   /// \param[in] CurrNode The node currently being visited.
   /// \param CurrIdx The current index of the string being visited.
-  void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) {
+  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) {
 
     bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
 
@@ -401,14 +433,13 @@ private:
         CurrNode.ConcatLen = CurrNode.size();
 
       if (CurrNode.Parent)
-       CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
+        CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
     }
 
     // Traverse the tree depth-first.
     for (auto &ChildPair : CurrNode.Children) {
       assert(ChildPair.second && "Node had a null child!");
-      setSuffixIndices(*ChildPair.second,
-                       CurrIdx + ChildPair.second->size());
+      setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size());
     }
 
     // Is this node a leaf?
@@ -437,11 +468,11 @@ private:
   ///
   /// \returns The number of suffixes that have not been added at the end of
   /// this step.
-  unsigned extend(size_t EndIdx, size_t SuffixesToAdd) {
+  unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
     SuffixTreeNode *NeedsLink = nullptr;
 
     while (SuffixesToAdd > 0) {
-    
+
       // Are we waiting to add anything other than just the last character?
       if (Active.Len == 0) {
         // If not, then say the active index is the end index.
@@ -469,7 +500,7 @@ private:
         // insert a new node.
         SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
 
-        size_t SubstringLen = NextNode->size();
+        unsigned SubstringLen = NextNode->size();
 
         // Is the current suffix we're trying to insert longer than the size of
         // the child we want to move to?
@@ -515,10 +546,8 @@ private:
 
         // The node s from the diagram
         SuffixTreeNode *SplitNode =
-            insertInternalNode(Active.Node,
-                               NextNode->StartIdx,
-                               NextNode->StartIdx + Active.Len - 1,
-                               FirstChar);
+            insertInternalNode(Active.Node, NextNode->StartIdx,
+                               NextNode->StartIdx + Active.Len - 1, FirstChar);
 
         // Insert the new node representing the new substring into the tree as
         // a child of the split node. This is the node l from the diagram.
@@ -556,87 +585,6 @@ private:
   }
 
 public:
-
-  /// Find all repeated substrings that satisfy \p BenefitFn.
-  ///
-  /// If a substring appears at least twice, then it must be represented by
-  /// an internal node which appears in at least two suffixes. Each suffix is
-  /// represented by a leaf node. To do this, we visit each internal node in
-  /// the tree, using the leaf children of each internal node. If an internal
-  /// node represents a beneficial substring, then we use each of its leaf
-  /// children to find the locations of its substring.
-  ///
-  /// \param[out] CandidateList Filled with candidates representing each
-  /// beneficial substring.
-  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
-  /// type of candidate.
-  /// \param BenefitFn The function to satisfy.
-  ///
-  /// \returns The length of the longest candidate found.
-  size_t findCandidates(std::vector<Candidate> &CandidateList,
-  std::vector<OutlinedFunction> &FunctionList,
-  const std::function<unsigned(SuffixTreeNode &, size_t, unsigned)>
-  &BenefitFn) {
-
-    CandidateList.clear();
-    FunctionList.clear();
-    size_t FnIdx = 0;
-    size_t MaxLen = 0;
-
-    for (SuffixTreeNode* Leaf : LeafVector) {
-      assert(Leaf && "Leaves in LeafVector cannot be null!");
-      if (!Leaf->IsInTree)
-        continue;
-
-      assert(Leaf->Parent && "All leaves must have parents!");
-      SuffixTreeNode &Parent = *(Leaf->Parent);
-
-      // If it doesn't appear enough, or we already outlined from it, skip it.
-      if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
-        continue;
-
-      size_t StringLen = Leaf->ConcatLen - Leaf->size();
-
-      // How many instructions would outlining this string save?
-      unsigned Benefit = BenefitFn(Parent,
-        StringLen, Str[Leaf->SuffixIdx + StringLen - 1]);
-
-      // If it's not beneficial, skip it.
-      if (Benefit < 1)
-        continue;
-
-      if (StringLen > MaxLen)
-        MaxLen = StringLen;
-
-      unsigned OccurrenceCount = 0;
-      for (auto &ChildPair : Parent.Children) {
-        SuffixTreeNode *M = ChildPair.second;
-
-        // Is it a leaf? If so, we have an occurrence of this candidate.
-        if (M && M->IsInTree && M->isLeaf()) {
-          OccurrenceCount++;
-          CandidateList.emplace_back(M->SuffixIdx, StringLen, FnIdx);
-          CandidateList.back().Benefit = Benefit;
-          M->IsInTree = false;
-        }
-      }
-
-      // Save the function for the new candidate sequence.
-      std::vector<unsigned> CandidateSequence;
-      for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
-        CandidateSequence.push_back(Str[i]);
-
-      FunctionList.emplace_back(FnIdx, OccurrenceCount, CandidateSequence,
-                                Benefit, false);
-
-      // Move to the next function.
-      FnIdx++;
-      Parent.IsInTree = false;
-    }
-
-    return MaxLen;
-  }
- 
   /// Construct a suffix tree from a sequence of unsigned integers.
   ///
   /// \param Str The string to construct the suffix tree for.
@@ -644,17 +592,18 @@ public:
     Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
     Root->IsInTree = true;
     Active.Node = Root;
-    LeafVector = std::vector<SuffixTreeNode*>(Str.size());
+    LeafVector = std::vector<SuffixTreeNode *>(Str.size());
 
     // Keep track of the number of suffixes we have to add of the current
     // prefix.
-    size_t SuffixesToAdd = 0;
+    unsigned SuffixesToAdd = 0;
     Active.Node = Root;
 
     // Construct the suffix tree iteratively on each prefix of the string.
     // PfxEndIdx is the end index of the current prefix.
     // End is one past the last element in the string.
-    for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
+    for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
+         PfxEndIdx++) {
       SuffixesToAdd++;
       LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
       SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
@@ -708,9 +657,9 @@ struct InstructionMapper {
     MachineInstr &MI = *It;
     bool WasInserted;
     DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
-    ResultIt;
+        ResultIt;
     std::tie(ResultIt, WasInserted) =
-    InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
+        InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
     unsigned MINumber = ResultIt->second;
 
     // There was an insertion.
@@ -725,10 +674,10 @@ struct InstructionMapper {
     if (LegalInstrNumber >= IllegalInstrNumber)
       report_fatal_error("Instruction mapping overflow!");
 
-    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey()
-          && "Tried to assign DenseMap tombstone or empty key to instruction.");
-    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey()
-          && "Tried to assign DenseMap tombstone or empty key to instruction.");
+    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+           "Tried to assign DenseMap tombstone or empty key to instruction.");
+    assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+           "Tried to assign DenseMap tombstone or empty key to instruction.");
 
     return MINumber;
   }
@@ -748,13 +697,11 @@ struct InstructionMapper {
     assert(LegalInstrNumber < IllegalInstrNumber &&
            "Instruction mapping overflow!");
 
-    assert(IllegalInstrNumber !=
-      DenseMapInfo<unsigned>::getEmptyKey() &&
-      "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+    assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
+           "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
 
-    assert(IllegalInstrNumber !=
-      DenseMapInfo<unsigned>::getTombstoneKey() &&
-      "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+    assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
+           "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
 
     return MINumber;
   }
@@ -777,17 +724,17 @@ struct InstructionMapper {
          It++) {
 
       // Keep track of where this instruction is in the module.
-      switch(TII.getOutliningType(*It)) {
-        case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
-          mapToIllegalUnsigned(It);
-          break;
+      switch (TII.getOutliningType(*It)) {
+      case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
+        mapToIllegalUnsigned(It);
+        break;
 
-        case TargetInstrInfo::MachineOutlinerInstrType::Legal:
-          mapToLegalUnsigned(It);
-          break;
+      case TargetInstrInfo::MachineOutlinerInstrType::Legal:
+        mapToLegalUnsigned(It);
+        break;
 
-        case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
-          break;
+      case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
+        break;
       }
     }
 
@@ -804,9 +751,9 @@ struct InstructionMapper {
     // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
     // changed.
     assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
-                "DenseMapInfo<unsigned>'s empty key isn't -1!");
+           "DenseMapInfo<unsigned>'s empty key isn't -1!");
     assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
-                "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+           "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
   }
 };
 
@@ -823,6 +770,10 @@ struct MachineOutliner : public ModulePass {
 
   static char ID;
 
+  /// \brief Set to true if the outliner should consider functions with
+  /// linkonceodr linkage.
+  bool OutlineFromLinkOnceODRs = false;
+
   StringRef getPassName() const override { return "Machine Outliner"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -832,10 +783,35 @@ struct MachineOutliner : public ModulePass {
     ModulePass::getAnalysisUsage(AU);
   }
 
-  MachineOutliner() : ModulePass(ID) {
+  MachineOutliner(bool OutlineFromLinkOnceODRs = false)
+      : ModulePass(ID), OutlineFromLinkOnceODRs(OutlineFromLinkOnceODRs) {
     initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
   }
 
+  /// Find all repeated substrings that satisfy the outlining cost model.
+  ///
+  /// If a substring appears at least twice, then it must be represented by
+  /// an internal node which appears in at least two suffixes. Each suffix is
+  /// represented by a leaf node. To do this, we visit each internal node in
+  /// the tree, using the leaf children of each internal node. If an internal
+  /// node represents a beneficial substring, then we use each of its leaf
+  /// children to find the locations of its substring.
+  ///
+  /// \param ST A suffix tree to query.
+  /// \param TII TargetInstrInfo for the target.
+  /// \param Mapper Contains outlining mapping information.
+  /// \param[out] CandidateList Filled with candidates representing each
+  /// beneficial substring.
+  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
+  /// type of candidate.
+  ///
+  /// \returns The length of the longest candidate found.
+  unsigned
+  findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
+                 InstructionMapper &Mapper,
+                 std::vector<std::shared_ptr<Candidate>> &CandidateList,
+                 std::vector<OutlinedFunction> &FunctionList);
+
   /// \brief Replace the sequences of instructions represented by the
   /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
   /// described in \p FunctionList.
@@ -844,7 +820,8 @@ struct MachineOutliner : public ModulePass {
   /// \param CandidateList A list of candidates to be outlined.
   /// \param FunctionList A list of functions to be inserted into the module.
   /// \param Mapper Contains the instruction mappings for the module.
-  bool outline(Module &M, const ArrayRef<Candidate> &CandidateList,
+  bool outline(Module &M,
+               const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
                std::vector<OutlinedFunction> &FunctionList,
                InstructionMapper &Mapper);
 
@@ -865,11 +842,15 @@ struct MachineOutliner : public ModulePass {
   /// \param TII TargetInstrInfo for the module.
   ///
   /// \returns The length of the longest candidate found. 0 if there are none.
-  unsigned buildCandidateList(std::vector<Candidate> &CandidateList,
-                              std::vector<OutlinedFunction> &FunctionList,
-                              SuffixTree &ST,
-                              InstructionMapper &Mapper,
-                              const TargetInstrInfo &TII);
+  unsigned
+  buildCandidateList(std::vector<std::shared_ptr<Candidate>> &CandidateList,
+                     std::vector<OutlinedFunction> &FunctionList,
+                     SuffixTree &ST, InstructionMapper &Mapper,
+                     const TargetInstrInfo &TII);
+
+  /// Helper function for pruneOverlaps.
+  /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.
+  void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList);
 
   /// \brief Remove any overlapping candidates that weren't handled by the
   /// suffix tree's pruning method.
@@ -881,11 +862,12 @@ struct MachineOutliner : public ModulePass {
   ///
   /// \param[in,out] CandidateList A list of outlining candidates.
   /// \param[in,out] FunctionList A list of functions to be outlined.
+  /// \param Mapper Contains instruction mapping info for outlining.
   /// \param MaxCandidateLen The length of the longest candidate.
   /// \param TII TargetInstrInfo for the module.
-  void pruneOverlaps(std::vector<Candidate> &CandidateList,
+  void pruneOverlaps(std::vector<std::shared_ptr<Candidate>> &CandidateList,
                      std::vector<OutlinedFunction> &FunctionList,
-                     unsigned MaxCandidateLen,
+                     InstructionMapper &Mapper, unsigned MaxCandidateLen,
                      const TargetInstrInfo &TII);
 
   /// Construct a suffix tree on the instructions in \p M and outline repeated
@@ -898,16 +880,223 @@ struct MachineOutliner : public ModulePass {
 char MachineOutliner::ID = 0;
 
 namespace llvm {
-ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
+ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) {
+  return new MachineOutliner(OutlineFromLinkOnceODRs);
+}
+
+} // namespace llvm
+
+INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
+                false)
+
+unsigned MachineOutliner::findCandidates(
+    SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,
+    std::vector<std::shared_ptr<Candidate>> &CandidateList,
+    std::vector<OutlinedFunction> &FunctionList) {
+  CandidateList.clear();
+  FunctionList.clear();
+  unsigned MaxLen = 0;
+
+  // FIXME: Visit internal nodes instead of leaves.
+  for (SuffixTreeNode *Leaf : ST.LeafVector) {
+    assert(Leaf && "Leaves in LeafVector cannot be null!");
+    if (!Leaf->IsInTree)
+      continue;
+
+    assert(Leaf->Parent && "All leaves must have parents!");
+    SuffixTreeNode &Parent = *(Leaf->Parent);
+
+    // If it doesn't appear enough, or we already outlined from it, skip it.
+    if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
+      continue;
+
+    // Figure out if this candidate is beneficial.
+    unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size();
+
+    // Too short to be beneficial; skip it.
+    // FIXME: This isn't necessarily true for, say, X86. If we factor in
+    // instruction lengths we need more information than this.
+    if (StringLen < 2)
+      continue;
+
+    // If this is a beneficial class of candidate, then every one is stored in
+    // this vector.
+    std::vector<Candidate> CandidatesForRepeatedSeq;
+
+    // Describes the start and end point of each candidate. This allows the
+    // target to infer some information about each occurrence of each repeated
+    // sequence.
+    // FIXME: CandidatesForRepeatedSeq and this should be combined.
+    std::vector<
+        std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+        RepeatedSequenceLocs;
+
+    // Figure out the call overhead for each instance of the sequence.
+    for (auto &ChildPair : Parent.Children) {
+      SuffixTreeNode *M = ChildPair.second;
+
+      if (M && M->IsInTree && M->isLeaf()) {
+        // Never visit this leaf again.
+        M->IsInTree = false;
+        unsigned StartIdx = M->SuffixIdx;
+        unsigned EndIdx = StartIdx + StringLen - 1;
+
+        // Trick: Discard some candidates that would be incompatible with the
+        // ones we've already found for this sequence. This will save us some
+        // work in candidate selection.
+        //
+        // If two candidates overlap, then we can't outline them both. This
+        // happens when we have candidates that look like, say
+        //
+        // AA (where each "A" is an instruction).
+        //
+        // We might have some portion of the module that looks like this:
+        // AAAAAA (6 A's) 
+        //
+        // In this case, there are 5 different copies of "AA" in this range, but
+        // at most 3 can be outlined. If only outlining 3 of these is going to
+        // be unbeneficial, then we ought to not bother.
+        //
+        // Note that two things DON'T overlap when they look like this:
+        // start1...end1 .... start2...end2
+        // That is, one must either
+        // * End before the other starts
+        // * Start after the other ends
+        if (std::all_of(CandidatesForRepeatedSeq.begin(),
+                        CandidatesForRepeatedSeq.end(),
+                        [&StartIdx, &EndIdx](const Candidate &C) {
+                          return (EndIdx < C.getStartIdx() ||
+                                  StartIdx > C.getEndIdx()); 
+                        })) {
+          // It doesn't overlap with anything, so we can outline it.
+          // Each sequence is over [StartIt, EndIt].
+          MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+          MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+
+          // Save the candidate and its location.
+          CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen,
+                                                FunctionList.size());
+          RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt));
+        }
+      }
+    }
+
+    // We've found something we might want to outline.
+    // Create an OutlinedFunction to store it and check if it'd be beneficial
+    // to outline.
+    TargetInstrInfo::MachineOutlinerInfo MInfo =
+        TII.getOutlininingCandidateInfo(RepeatedSequenceLocs);
+    std::vector<unsigned> Seq;
+    for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
+      Seq.push_back(ST.Str[i]);
+    OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(),
+                        Seq, MInfo);
+    unsigned Benefit = OF.getBenefit();
+
+    // Is it better to outline this candidate than not?
+    if (Benefit < 1) {
+      // Outlining this candidate would take more instructions than not
+      // outlining.
+      // Emit a remark explaining why we didn't outline this candidate.
+      std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C =
+          RepeatedSequenceLocs[0];
+      MachineOptimizationRemarkEmitter MORE(
+          *(C.first->getParent()->getParent()), nullptr);
+      MORE.emit([&]() {
+        MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
+                                          C.first->getDebugLoc(),
+                                          C.first->getParent());
+        R << "Did not outline " << NV("Length", StringLen) << " instructions"
+          << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size())
+          << " locations."
+          << " Instructions from outlining all occurrences ("
+          << NV("OutliningCost", OF.getOutliningCost()) << ")"
+          << " >= Unoutlined instruction count ("
+          << NV("NotOutliningCost", StringLen * OF.getOccurrenceCount()) << ")"
+          << " (Also found at: ";
+
+        // Tell the user the other places the candidate was found.
+        for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) {
+          R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
+                  RepeatedSequenceLocs[i].first->getDebugLoc());
+          if (i != e - 1)
+            R << ", ";
+        }
+
+        R << ")";
+        return R;
+      });
+
+      // Move to the next candidate.
+      continue;
+    }
+
+    if (StringLen > MaxLen)
+      MaxLen = StringLen;
+
+    // At this point, the candidate class is seen as beneficial. Set their
+    // benefit values and save them in the candidate list.
+    std::vector<std::shared_ptr<Candidate>> CandidatesForFn;
+    for (Candidate &C : CandidatesForRepeatedSeq) {
+      C.Benefit = Benefit;
+      C.MInfo = MInfo;
+      std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C);
+      CandidateList.push_back(Cptr);
+      CandidatesForFn.push_back(Cptr);
+    }
+
+    FunctionList.push_back(OF);
+    FunctionList.back().Candidates = CandidatesForFn;
+
+    // Move to the next function.
+    Parent.IsInTree = false;
+  }
+
+  return MaxLen;
+}
+
+// Remove C from the candidate space, and update its OutlinedFunction.
+void MachineOutliner::prune(Candidate &C,
+                            std::vector<OutlinedFunction> &FunctionList) {
+  // Get the OutlinedFunction associated with this Candidate.
+  OutlinedFunction &F = FunctionList[C.FunctionIdx];
+
+  // Update C's associated function's occurrence count.
+  F.decrement();
+
+  // Remove C from the CandidateList.
+  C.InCandidateList = false;
+
+  DEBUG(dbgs() << "- Removed a Candidate \n";
+        dbgs() << "--- Num fns left for candidate: " << F.getOccurrenceCount()
+               << "\n";
+        dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit()
+               << "\n";);
 }
 
-INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE,
-                "Machine Function Outliner", false, false)
+void MachineOutliner::pruneOverlaps(
+    std::vector<std::shared_ptr<Candidate>> &CandidateList,
+    std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper,
+    unsigned MaxCandidateLen, const TargetInstrInfo &TII) {
+
+  // Return true if this candidate became unbeneficial for outlining in a
+  // previous step.
+  auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) {
+
+    // Check if the candidate was removed in a previous step.
+    if (!C.InCandidateList)
+      return true;
+
+    // C must be alive. Check if we should remove it.
+    if (FunctionList[C.FunctionIdx].getBenefit() < 1) {
+      prune(C, FunctionList);
+      return true;
+    }
+
+    // C is in the list, and F is still beneficial.
+    return false;
+  };
 
-void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
-                                    std::vector<OutlinedFunction> &FunctionList,
-                                    unsigned MaxCandidateLen,
-                                    const TargetInstrInfo &TII) {
   // TODO: Experiment with interval trees or other interval-checking structures
   // to lower the time complexity of this function.
   // TODO: Can we do better than the simple greedy choice?
@@ -915,56 +1104,35 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
   // This is O(MaxCandidateLen * CandidateList.size()).
   for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
        It++) {
-    Candidate &C1 = *It;
-    OutlinedFunction &F1 = FunctionList[C1.FunctionIdx];
+    Candidate &C1 = **It;
 
-    // If we removed this candidate, skip it.
-    if (!C1.InCandidateList)
+    // If C1 was already pruned, or its function is no longer beneficial for
+    // outlining, move to the next candidate.
+    if (ShouldSkipCandidate(C1))
       continue;
 
-    // Is it still worth it to outline C1?
-    if (F1.Benefit < 1 || F1.OccurrenceCount < 2) {
-      assert(F1.OccurrenceCount > 0 &&
-               "Can't remove OutlinedFunction with no occurrences!");
-      F1.OccurrenceCount--;
-      C1.InCandidateList = false;
-      continue;
-    }
-
     // The minimum start index of any candidate that could overlap with this
     // one.
     unsigned FarthestPossibleIdx = 0;
 
     // Either the index is 0, or it's at most MaxCandidateLen indices away.
-    if (C1.StartIdx > MaxCandidateLen)
-      FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen;
+    if (C1.getStartIdx() > MaxCandidateLen)
+      FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen;
 
     // Compare against the candidates in the list that start at at most
     // FarthestPossibleIdx indices away from C1. There are at most
     // MaxCandidateLen of these.
     for (auto Sit = It + 1; Sit != Et; Sit++) {
-      Candidate &C2 = *Sit;
-      OutlinedFunction &F2 = FunctionList[C2.FunctionIdx];
+      Candidate &C2 = **Sit;
 
       // Is this candidate too far away to overlap?
-      if (C2.StartIdx < FarthestPossibleIdx)
+      if (C2.getStartIdx() < FarthestPossibleIdx)
         break;
 
-      // Did we already remove this candidate in a previous step?
-      if (!C2.InCandidateList)
-        continue;
-
-      // Is the function beneficial to outline?
-      if (F2.OccurrenceCount < 2 || F2.Benefit < 1) {
-        // If not, remove this candidate and move to the next one.
-        assert(F2.OccurrenceCount > 0 &&
-               "Can't remove OutlinedFunction with no occurrences!");
-        F2.OccurrenceCount--;
-        C2.InCandidateList = false;
+      // If C2 was already pruned, or its function is no longer beneficial for
+      // outlining, move to the next candidate.
+      if (ShouldSkipCandidate(C2))
         continue;
-      }
-
-      size_t C2End = C2.StartIdx + C2.Len - 1;
 
       // Do C1 and C2 overlap?
       //
@@ -974,7 +1142,7 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
       // We sorted our candidate list so C2Start <= C1Start. We know that
       // C2End > C2Start since each candidate has length >= 2. Therefore, all we
       // have to check is C2End < C2Start to see if we overlap.
-      if (C2End < C1.StartIdx)
+      if (C2.getEndIdx() < C1.getStartIdx())
         continue;
 
       // C1 and C2 overlap.
@@ -982,118 +1150,52 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
       //
       // Approximate this by picking the one which would have saved us the
       // most instructions before any pruning.
-      if (C1.Benefit >= C2.Benefit) {
-
-        // C1 is better, so remove C2 and update C2's OutlinedFunction to
-        // reflect the removal.
-        assert(F2.OccurrenceCount > 0 &&
-               "Can't remove OutlinedFunction with no occurrences!");
-        F2.OccurrenceCount--;
-        F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(),
-                                             F2.OccurrenceCount,
-                                             F2.IsTailCall
-                                             );
-
-        C2.InCandidateList = false;
-
-        DEBUG (
-          dbgs() << "- Removed C2. \n";
-          dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount << "\n";
-          dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";
-        );
 
-      } else {
-        // C2 is better, so remove C1 and update C1's OutlinedFunction to
-        // reflect the removal.
-        assert(F1.OccurrenceCount > 0 &&
-               "Can't remove OutlinedFunction with no occurrences!");
-        F1.OccurrenceCount--;
-        F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(),
-                                             F1.OccurrenceCount,
-                                             F1.IsTailCall
-                                             );
-        C1.InCandidateList = false;
-
-        DEBUG (
-          dbgs() << "- Removed C1. \n";
-          dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount << "\n";
-          dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";
-        );
-
-        // C1 is out, so we don't have to compare it against anyone else.
+      // Is C2 a better candidate?
+      if (C2.Benefit > C1.Benefit) {
+        // Yes, so prune C1. Since C1 is dead, we don't have to compare it
+        // against anything anymore, so break.
+        prune(C1, FunctionList);
         break;
       }
+
+      // Prune C2 and move on to the next candidate.
+      prune(C2, FunctionList);
     }
   }
 }
 
-unsigned
-MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList,
-                                    std::vector<OutlinedFunction> &FunctionList,
-                                    SuffixTree &ST,
-                                    InstructionMapper &Mapper,
-                                    const TargetInstrInfo &TII) {
+unsigned MachineOutliner::buildCandidateList(
+    std::vector<std::shared_ptr<Candidate>> &CandidateList,
+    std::vector<OutlinedFunction> &FunctionList, SuffixTree &ST,
+    InstructionMapper &Mapper, const TargetInstrInfo &TII) {
 
   std::vector<unsigned> CandidateSequence; // Current outlining candidate.
-  size_t MaxCandidateLen = 0; // Length of the longest candidate.
-
-  // Function for maximizing query in the suffix tree.
-  // This allows us to define more fine-grained types of things to outline in
-  // the target without putting target-specific info in the suffix tree.
-  auto BenefitFn = [&TII, &Mapper](const SuffixTreeNode &Curr,
-                                   size_t StringLen, unsigned EndVal) {
-
-    // The root represents the empty string.
-    if (Curr.isRoot())
-      return 0u;
-
-    // Is this long enough to outline?
-	// TODO: Let the target decide how "long" a string is in terms of the sizes
-	// of the instructions in the string. For example, if a call instruction
-	// is smaller than a one instruction string, we should outline that string.
-    if (StringLen < 2)
-      return 0u;
-
-    size_t Occurrences = Curr.OccurrenceCount;
+  unsigned MaxCandidateLen = 0;            // Length of the longest candidate.
 
-    // Anything we want to outline has to appear at least twice.
-    if (Occurrences < 2)
-      return 0u;
-
-    // Check if the last instruction in the sequence is a return.
-    MachineInstr *LastInstr =
-    Mapper.IntegerInstructionMap[EndVal];
-    assert(LastInstr && "Last instruction in sequence was unmapped!");
-
-    // The only way a terminator could be mapped as legal is if it was safe to
-    // tail call.
-    bool IsTailCall = LastInstr->isTerminator();
-    return TII.getOutliningBenefit(StringLen, Occurrences, IsTailCall);
-  };
-
-  MaxCandidateLen = ST.findCandidates(CandidateList, FunctionList, BenefitFn);
-
-  for (auto &OF : FunctionList)
-    OF.IsTailCall = Mapper.
-                    IntegerInstructionMap[OF.Sequence.back()]->isTerminator();
+  MaxCandidateLen =
+      findCandidates(ST, TII, Mapper, CandidateList, FunctionList);
 
   // Sort the candidates in decending order. This will simplify the outlining
   // process when we have to remove the candidates from the mapping by
   // allowing us to cut them out without keeping track of an offset.
-  std::stable_sort(CandidateList.begin(), CandidateList.end());
+  std::stable_sort(
+      CandidateList.begin(), CandidateList.end(),
+      [](const std::shared_ptr<Candidate> &LHS,
+         const std::shared_ptr<Candidate> &RHS) { return *LHS < *RHS; });
 
   return MaxCandidateLen;
 }
 
 MachineFunction *
 MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
-  InstructionMapper &Mapper) {
+                                        InstructionMapper &Mapper) {
 
   // Create the function name. This should be unique. For now, just hash the
   // module name and include it in the function name plus the number of this
   // function.
   std::ostringstream NameStream;
-  NameStream << "OUTLINED_FUNCTION" << "_" << OF.Name;
+  NameStream << "OUTLINED_FUNCTION_" << OF.Name;
 
   // Create the function using an IR-level function.
   LLVMContext &C = M.getContext();
@@ -1119,7 +1221,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
   // Insert the new function into the module.
   MF.insert(MF.begin(), &MBB);
 
-  TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall);
+  TII.insertOutlinerPrologue(MBB, MF, OF.MInfo);
 
   // Copy over the instructions for the function using the integer mappings in
   // its sequence.
@@ -1134,21 +1236,19 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
     MBB.insert(MBB.end(), NewMI);
   }
 
-  TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall);
+  TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo);
 
   return &MF;
 }
 
-bool MachineOutliner::outline(Module &M,
-                              const ArrayRef<Candidate> &CandidateList,
-                              std::vector<OutlinedFunction> &FunctionList,
-                              InstructionMapper &Mapper) {
+bool MachineOutliner::outline(
+    Module &M, const ArrayRef<std::shared_ptr<Candidate>> &CandidateList,
+    std::vector<OutlinedFunction> &FunctionList, InstructionMapper &Mapper) {
 
   bool OutlinedSomething = false;
-
   // Replace the candidates with calls to their respective outlined functions.
-  for (const Candidate &C : CandidateList) {
-
+  for (const std::shared_ptr<Candidate> &Cptr : CandidateList) {
+    Candidate &C = *Cptr;
     // Was the candidate removed during pruneOverlaps?
     if (!C.InCandidateList)
       continue;
@@ -1157,14 +1257,15 @@ bool MachineOutliner::outline(Module &M,
     OutlinedFunction &OF = FunctionList[C.FunctionIdx];
 
     // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
-    if (OF.OccurrenceCount < 2 || OF.Benefit < 1)
+    if (OF.getBenefit() < 1)
       continue;
 
     // If not, then outline it.
-    assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
-    MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent();
-    MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx];
-    unsigned EndIdx = C.StartIdx + C.Len - 1;
+    assert(C.getStartIdx() < Mapper.InstrList.size() &&
+           "Candidate out of bounds!");
+    MachineBasicBlock *MBB = (*Mapper.InstrList[C.getStartIdx()]).getParent();
+    MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.getStartIdx()];
+    unsigned EndIdx = C.getEndIdx();
 
     assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
     MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
@@ -1175,6 +1276,37 @@ bool MachineOutliner::outline(Module &M,
     // Does this candidate have a function yet?
     if (!OF.MF) {
       OF.MF = createOutlinedFunction(M, OF, Mapper);
+      MachineBasicBlock *MBB = &*OF.MF->begin();
+
+      // Output a remark telling the user that an outlined function was created,
+      // and explaining where it came from.
+      MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
+      MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
+                                  MBB->findDebugLoc(MBB->begin()), MBB);
+      R << "Saved " << NV("OutliningBenefit", OF.getBenefit())
+        << " instructions by "
+        << "outlining " << NV("Length", OF.Sequence.size()) << " instructions "
+        << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
+        << " locations. "
+        << "(Found at: ";
+
+      // Tell the user the other places the candidate was found.
+      for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
+
+        // Skip over things that were pruned.
+        if (!OF.Candidates[i]->InCandidateList)
+          continue;
+
+        R << NV(
+            (Twine("StartLoc") + Twine(i)).str(),
+            Mapper.InstrList[OF.Candidates[i]->getStartIdx()]->getDebugLoc());
+        if (i != e - 1)
+          R << ", ";
+      }
+
+      R << ")";
+
+      MORE.emit(R);
       FunctionsCreated++;
     }
 
@@ -1183,8 +1315,8 @@ bool MachineOutliner::outline(Module &M,
     const TargetInstrInfo &TII = *STI.getInstrInfo();
 
     // Insert a call to the new function and erase the old sequence.
-    TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall);
-    StartIt = Mapper.InstrList[C.StartIdx];
+    TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo);
+    StartIt = Mapper.InstrList[C.getStartIdx()];
     MBB->erase(StartIt, EndIt);
 
     OutlinedSomething = true;
@@ -1193,9 +1325,7 @@ bool MachineOutliner::outline(Module &M,
     NumOutlined++;
   }
 
-  DEBUG (
-    dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";
-  );
+  DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
 
   return OutlinedSomething;
 }
@@ -1207,8 +1337,8 @@ bool MachineOutliner::runOnModule(Module &M) {
     return false;
 
   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
-  const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin())
-                                      .getSubtarget();
+  const TargetSubtargetInfo &STI =
+      MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget();
   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
   const TargetInstrInfo *TII = STI.getInstrInfo();
 
@@ -1219,7 +1349,8 @@ bool MachineOutliner::runOnModule(Module &M) {
     MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
 
     // Is the function empty? Safe to outline from?
-    if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF))
+    if (F.empty() ||
+        !TII->isFunctionSafeToOutlineFrom(MF, OutlineFromLinkOnceODRs))
       continue;
 
     // If it is, look at each MachineBasicBlock in the function.
@@ -1236,7 +1367,7 @@ bool MachineOutliner::runOnModule(Module &M) {
 
   // Construct a suffix tree, use it to find candidates, and then outline them.
   SuffixTree ST(Mapper.UnsignedVec);
-  std::vector<Candidate> CandidateList;
+  std::vector<std::shared_ptr<Candidate>> CandidateList;
   std::vector<OutlinedFunction> FunctionList;
 
   // Find all of the outlining candidates.
@@ -1244,7 +1375,7 @@ bool MachineOutliner::runOnModule(Module &M) {
       buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
 
   // Remove candidates that overlap with other candidates.
-  pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII);
+  pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
 
   // Outline each of the candidates and return true if something was outlined.
   return outline(M, CandidateList, FunctionList, Mapper);
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 19e9a50e2c43..18cb9af499a6 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -1,4 +1,4 @@
-//===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===//
+//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -73,14 +73,13 @@
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
@@ -90,19 +89,23 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/PassAnalysisSupport.h"
-#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
@@ -111,6 +114,7 @@
 #include <functional>
 #include <iterator>
 #include <map>
+#include <memory>
 #include <tuple>
 #include <utility>
 #include <vector>
@@ -169,7 +173,6 @@ namespace {
 
 class NodeSet;
 class SMSchedule;
-class SwingSchedulerDAG;
 
 /// The main class in the implementation of the target independent
 /// software pipeliner pass.
@@ -185,6 +188,7 @@ public:
 #ifndef NDEBUG
   static int NumTries;
 #endif
+
   /// Cache the target analysis information about the loop.
   struct LoopInfo {
     MachineBasicBlock *TBB = nullptr;
@@ -196,6 +200,7 @@ public:
   LoopInfo LI;
 
   static char ID;
+
   MachinePipeliner() : MachineFunctionPass(ID) {
     initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
   }
@@ -222,9 +227,9 @@ private:
 class SwingSchedulerDAG : public ScheduleDAGInstrs {
   MachinePipeliner &Pass;
   /// The minimum initiation interval between iterations for this schedule.
-  unsigned MII;
+  unsigned MII = 0;
   /// Set to true if a valid pipelined schedule is found for the loop.
-  bool Scheduled;
+  bool Scheduled = false;
   MachineLoop &Loop;
   LiveIntervals &LIS;
   const RegisterClassInfo &RegClassInfo;
@@ -234,9 +239,10 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
   ScheduleDAGTopologicalSort Topo;
 
   struct NodeInfo {
-    int ASAP;
-    int ALAP;
-    NodeInfo() : ASAP(0), ALAP(0) {}
+    int ASAP = 0;
+    int ALAP = 0;
+
+    NodeInfo() = default;
   };
   /// Computed properties for each node in the graph.
   std::vector<NodeInfo> ScheduleInfo;
@@ -245,10 +251,10 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
   /// Computed node ordering for scheduling.
   SetVector<SUnit *> NodeOrder;
 
-  typedef SmallVector<NodeSet, 8> NodeSetType;
-  typedef DenseMap<unsigned, unsigned> ValueMapTy;
-  typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy;
-  typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy;
+  using NodeSetType = SmallVector<NodeSet, 8>;
+  using ValueMapTy = DenseMap<unsigned, unsigned>;
+  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
 
   /// Instructions to change when emitting the final schedule.
   DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
@@ -272,8 +278,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
 
   public:
     Circuits(std::vector<SUnit> &SUs)
-        : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()),
-          AdjK(SUs.size()) {}
+        : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
+
     /// Reset the data structures used in the circuit algorithm.
     void reset() {
       Stack.clear();
@@ -281,6 +287,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
       B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
       NumPaths = 0;
     }
+
     void createAdjacencyStructure(SwingSchedulerDAG *DAG);
     bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
     void unblock(int U);
@@ -289,9 +296,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
 public:
   SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
                     const RegisterClassInfo &rci)
-      : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0),
-        Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci),
-        Topo(SUnits, &ExitSU) {
+      : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
+        RegClassInfo(rci), Topo(SUnits, &ExitSU) {
     P.MF->getSubtarget().getSMSMutations(Mutations);
   }
 
@@ -363,8 +369,9 @@ public:
   /// Set the Minimum Initiation Interval for this schedule attempt.
   void setMII(unsigned mii) { MII = mii; }
 
-  MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
-                                 bool UpdateDAG = false);
+  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
+
+  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
 
   /// Return the new base register that was stored away for the changed
   /// instruction.
@@ -455,7 +462,7 @@ private:
 /// that assigns a priority to the set.
 class NodeSet {
   SetVector<SUnit *> Nodes;
-  bool HasRecurrence;
+  bool HasRecurrence = false;
   unsigned RecMII = 0;
   int MaxMOV = 0;
   int MaxDepth = 0;
@@ -463,10 +470,9 @@ class NodeSet {
   SUnit *ExceedPressure = nullptr;
 
 public:
-  typedef SetVector<SUnit *>::const_iterator iterator;
-
-  NodeSet() : Nodes(), HasRecurrence(false) {}
+  using iterator = SetVector<SUnit *>::const_iterator;
 
+  NodeSet() = default;
   NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
 
   bool insert(SUnit *SU) { return Nodes.insert(SU); }
@@ -581,13 +587,13 @@ private:
 
   /// Keep track of the first cycle value in the schedule.  It starts
   /// as zero, but the algorithm allows negative values.
-  int FirstCycle;
+  int FirstCycle = 0;
 
   /// Keep track of the last cycle value in the schedule.
-  int LastCycle;
+  int LastCycle = 0;
 
   /// The initiation interval (II) for the schedule.
-  int InitiationInterval;
+  int InitiationInterval = 0;
 
   /// Target machine information.
   const TargetSubtargetInfo &ST;
@@ -600,11 +606,7 @@ private:
 public:
   SMSchedule(MachineFunction *mf)
       : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
-        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {
-    FirstCycle = 0;
-    LastCycle = 0;
-    InitiationInterval = 0;
-  }
+        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
 
   void reset() {
     ScheduledInstrs.clear();
@@ -638,9 +640,9 @@ public:
   bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
 
   /// Iterators for the cycle to instruction map.
-  typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator;
-  typedef DenseMap<int, std::deque<SUnit *>>::const_iterator
-      const_sched_iterator;
+  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
+  using const_sched_iterator =
+      DenseMap<int, std::deque<SUnit *>>::const_iterator;
 
   /// Return true if the instruction is scheduled at the specified stage.
   bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
@@ -715,6 +717,7 @@ char MachinePipeliner::ID = 0;
 int MachinePipeliner::NumTries = 0;
 #endif
 char &llvm::MachinePipelinerID = MachinePipeliner::ID;
+
 INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
                       "Modulo Software Pipelining", false, false)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -726,13 +729,13 @@ INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
 
 /// The "main" function for implementing Swing Modulo Scheduling.
 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
-  if (skipFunction(*mf.getFunction()))
+  if (skipFunction(mf.getFunction()))
     return false;
 
   if (!EnableSWP)
     return false;
 
-  if (mf.getFunction()->getAttributes().hasAttribute(
+  if (mf.getFunction().getAttributes().hasAttribute(
           AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
       !EnableSWPOptSize.getPosition())
     return false;
@@ -1256,6 +1259,8 @@ struct FuncUnitSorter {
   const InstrItineraryData *InstrItins;
   DenseMap<unsigned, unsigned> Resources;
 
+  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+
   // Compute the number of functional unit alternatives needed
   // at each stage, and take the minimum value. We prioritize the
   // instructions by the least number of choices first.
@@ -1291,7 +1296,6 @@ struct FuncUnitSorter {
     }
   }
 
-  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
   /// Return true if IS1 has less priority than IS2.
   bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
     unsigned F1 = 0, F2 = 0;
@@ -1384,7 +1388,7 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
   unsigned RecMII = 0;
 
   for (NodeSet &Nodes : NodeSets) {
-    if (Nodes.size() == 0)
+    if (Nodes.empty())
       continue;
 
     unsigned Delay = Nodes.size() - 1;
@@ -1554,7 +1558,6 @@ static bool ignoreDependence(const SDep &D, bool isPred) {
 ///  D - Depth of each node.
 ///  H - Height of each node.
 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
-
   ScheduleInfo.resize(SUnits.size());
 
   DEBUG({
@@ -1651,7 +1654,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
         Preds.insert(IS->getSUnit());
     }
   }
-  return Preds.size() > 0;
+  return !Preds.empty();
 }
 
 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
@@ -1683,7 +1686,7 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
         Succs.insert(PI->getSUnit());
     }
   }
-  return Succs.size() > 0;
+  return !Succs.empty();
 }
 
 /// Return true if there is a path from the specified node to any of the nodes
@@ -1868,7 +1871,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
         Visited.clear();
         computePath(NI, Path, NodesAdded, I, Visited);
       }
-      if (Path.size() > 0)
+      if (!Path.empty())
         I.insert(Path.begin(), Path.end());
     }
     // Add the nodes from the previous node set to the current node set.
@@ -1879,7 +1882,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
         Visited.clear();
         computePath(NI, Path, I, NodesAdded, Visited);
       }
-      if (Path.size() > 0)
+      if (!Path.empty())
         I.insert(Path.begin(), Path.end());
     }
     NodesAdded.insert(I.begin(), I.end());
@@ -1892,7 +1895,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
   if (succ_L(NodesAdded, N))
     for (SUnit *I : N)
       addConnectedNodes(I, NewSet, NodesAdded);
-  if (NewSet.size() > 0)
+  if (!NewSet.empty())
     NodeSets.push_back(NewSet);
 
   // Create a new node set with the connected nodes of any predecessor of a node
@@ -1901,7 +1904,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
   if (pred_L(NodesAdded, N))
     for (SUnit *I : N)
       addConnectedNodes(I, NewSet, NodesAdded);
-  if (NewSet.size() > 0)
+  if (!NewSet.empty())
     NodeSets.push_back(NewSet);
 
   // Create new nodes sets with the connected nodes any any remaining node that
@@ -1911,7 +1914,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
     if (NodesAdded.count(SU) == 0) {
       NewSet.clear();
       addConnectedNodes(SU, NewSet, NodesAdded);
-      if (NewSet.size() > 0)
+      if (!NewSet.empty())
         NodeSets.push_back(NewSet);
     }
   }
@@ -1976,7 +1979,7 @@ void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
     for (NodeSetType::iterator J = I + 1; J != E;) {
       J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
 
-      if (J->size() == 0) {
+      if (J->empty()) {
         NodeSets.erase(J);
         E = NodeSets.end();
       } else {
@@ -2147,8 +2150,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
 /// Process the nodes in the computed order and create the pipelined schedule
 /// of the instructions, if possible. Return true if a schedule is found.
 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
-
-  if (NodeOrder.size() == 0)
+  if (NodeOrder.empty())
     return false;
 
   bool scheduleFound = false;
@@ -2325,7 +2327,7 @@ void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
                                        ValueMapTy *VRMap,
                                        MBBVectorTy &PrologBBs) {
   MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
-  assert(PreheaderBB != NULL &&
+  assert(PreheaderBB != nullptr &&
          "Need to add code to handle loops w/o preheader");
   MachineBasicBlock *PredBB = PreheaderBB;
   InstrMapTy InstrMap;
@@ -3352,7 +3354,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
   unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
 
   // Look for the Phi instruction.
-  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
   MachineInstr *Phi = MRI.getVRegDef(BaseReg);
   if (!Phi || !Phi->isPHI())
     return false;
@@ -3389,9 +3391,8 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
 
 /// Apply changes to the instruction if needed. The changes are need
 /// to improve the scheduling and depend up on the final schedule.
-MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
-                                                  SMSchedule &Schedule,
-                                                  bool UpdateDAG) {
+void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+                                         SMSchedule &Schedule) {
   SUnit *SU = getSUnit(MI);
   DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
       InstrChanges.find(SU);
@@ -3399,7 +3400,7 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
     std::pair<unsigned, int64_t> RegAndOffset = It->second;
     unsigned BasePos, OffsetPos;
     if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
-      return nullptr;
+      return;
     unsigned BaseReg = MI->getOperand(BasePos).getReg();
     MachineInstr *LoopDef = findDefInLoop(BaseReg);
     int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
@@ -3417,15 +3418,11 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
       int64_t NewOffset =
           MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
       NewMI->getOperand(OffsetPos).setImm(NewOffset);
-      if (UpdateDAG) {
-        SU->setInstr(NewMI);
-        MISUnitMap[NewMI] = SU;
-      }
+      SU->setInstr(NewMI);
+      MISUnitMap[NewMI] = SU;
       NewMIs.insert(NewMI);
-      return NewMI;
     }
   }
-  return nullptr;
 }
 
 /// Return true for an order dependence that is loop carried potentially.
@@ -3871,6 +3868,58 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
   return true;
 }
 
+/// Attempt to fix the degenerate cases when the instruction serialization
+/// causes the register lifetimes to overlap. For example,
+///   p' = store_pi(p, b)
+///      = load p, offset
+/// In this case p and p' overlap, which means that two registers are needed.
+/// Instead, this function changes the load to use p' and updates the offset.
+void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
+  unsigned OverlapReg = 0;
+  unsigned NewBaseReg = 0;
+  for (SUnit *SU : Instrs) {
+    MachineInstr *MI = SU->getInstr();
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      // Look for an instruction that uses p. The instruction occurs in the
+      // same cycle but occurs later in the serialized order.
+      if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
+        // Check that the instruction appears in the InstrChanges structure,
+        // which contains instructions that can have the offset updated.
+        DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+          InstrChanges.find(SU);
+        if (It != InstrChanges.end()) {
+          unsigned BasePos, OffsetPos;
+          // Update the base register and adjust the offset.
+          if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
+            MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+            NewMI->getOperand(BasePos).setReg(NewBaseReg);
+            int64_t NewOffset =
+                MI->getOperand(OffsetPos).getImm() - It->second.second;
+            NewMI->getOperand(OffsetPos).setImm(NewOffset);
+            SU->setInstr(NewMI);
+            MISUnitMap[NewMI] = SU;
+            NewMIs.insert(NewMI);
+          }
+        }
+        OverlapReg = 0;
+        NewBaseReg = 0;
+        break;
+      }
+      // Look for an instruction of the form p' = op(p), which uses and defines
+      // two virtual registers that get allocated to the same physical register.
+      unsigned TiedUseIdx = 0;
+      if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
+        // OverlapReg is p in the example above.
+        OverlapReg = MI->getOperand(TiedUseIdx).getReg();
+        // NewBaseReg is p' in the example above.
+        NewBaseReg = MI->getOperand(i).getReg();
+        break;
+      }
+    }
+  }
+}
+
 /// After the schedule has been formed, call this function to combine
 /// the instructions from the different stages/cycles.  That is, this
 /// function creates a schedule that represents a single iteration.
@@ -3931,7 +3980,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
   // map. We need to use the new registers to create the correct order.
   for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
     SUnit *SU = &SSD->SUnits[i];
-    SSD->applyInstrChange(SU->getInstr(), *this, true);
+    SSD->applyInstrChange(SU->getInstr(), *this);
   }
 
   // Reorder the instructions in each cycle to fix and improve the
@@ -3955,11 +4004,13 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
     // Replace the old order with the new order.
     cycleInstrs.swap(newOrderZC);
     cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+    SSD->fixupRegisterOverlaps(cycleInstrs);
   }
 
   DEBUG(dump(););
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// Print the schedule information to the given output.
 void SMSchedule::print(raw_ostream &os) const {
   // Iterate over each cycle.
@@ -3975,7 +4026,6 @@ void SMSchedule::print(raw_ostream &os) const {
   }
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// Utility function used for debugging to print the schedule.
 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
 #endif
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index be06053f0040..b82ab02a6e6c 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -19,6 +19,9 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
@@ -28,9 +31,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 
 using namespace llvm;
@@ -183,7 +183,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
     MachineOperand *MO = &M;
     MachineInstr *MI = MO->getParent();
     if (!MI) {
-      errs() << PrintReg(Reg, getTargetRegisterInfo())
+      errs() << printReg(Reg, getTargetRegisterInfo())
              << " use list MachineOperand " << MO
              << " has no parent instruction.\n";
       Valid = false;
@@ -192,19 +192,19 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
     MachineOperand *MO0 = &MI->getOperand(0);
     unsigned NumOps = MI->getNumOperands();
     if (!(MO >= MO0 && MO < MO0+NumOps)) {
-      errs() << PrintReg(Reg, getTargetRegisterInfo())
+      errs() << printReg(Reg, getTargetRegisterInfo())
              << " use list MachineOperand " << MO
              << " doesn't belong to parent MI: " << *MI;
       Valid = false;
     }
     if (!MO->isReg()) {
-      errs() << PrintReg(Reg, getTargetRegisterInfo())
+      errs() << printReg(Reg, getTargetRegisterInfo())
              << " MachineOperand " << MO << ": " << *MO
              << " is not a register\n";
       Valid = false;
     }
     if (MO->getReg() != Reg) {
-      errs() << PrintReg(Reg, getTargetRegisterInfo())
+      errs() << printReg(Reg, getTargetRegisterInfo())
              << " use-list MachineOperand " << MO << ": "
              << *MO << " is the wrong register\n";
       Valid = false;
@@ -428,8 +428,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
   // Emit the copies into the top of the block.
   for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
     if (LiveIns[i].second) {
-      if (use_empty(LiveIns[i].second)) {
-        // The livein has no uses. Drop it.
+      if (use_nodbg_empty(LiveIns[i].second)) {
+        // The livein has no non-dbg uses. Drop it.
         //
         // It would be preferable to have isel avoid creating live-in
         // records for unused arguments in the first place, but it's
@@ -487,6 +487,13 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
   return true;
 }
 
+bool
+MachineRegisterInfo::isCallerPreservedOrConstPhysReg(unsigned PhysReg) const {
+  const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+  return isConstantPhysReg(PhysReg) ||
+      TRI->isCallerPreservedPhysReg(PhysReg, *MF);
+}
+
 /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
 /// specified register as undefined which causes the DBG_VALUE to be
 /// deleted during LiveDebugVariables analysis.
@@ -524,7 +531,7 @@ static bool isNoReturnDef(const MachineOperand &MO) {
   const MachineFunction &MF = *MBB.getParent();
   // We need to keep correct unwind information even if the function will
   // not return, since the runtime may need it.
-  if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
+  if (MF.getFunction().hasFnAttribute(Attribute::UWTable))
     return false;
   const Function *Called = getCalledFunction(MI);
   return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) ||
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index e9b47559309f..36844e9fb30a 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -15,31 +15,36 @@
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "machine-ssaupdater"
 
-typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+using AvailableValsTy = DenseMap<MachineBasicBlock *, unsigned>;
+
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
 MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
                                      SmallVectorImpl<MachineInstr*> *NewPHI)
-  : AV(nullptr), InsertedPHIs(NewPHI) {
-  TII = MF.getSubtarget().getInstrInfo();
-  MRI = &MF.getRegInfo();
-}
+  : InsertedPHIs(NewPHI), TII(MF.getSubtarget().getInstrInfo()),
+    MRI(&MF.getRegInfo()) {}
 
 MachineSSAUpdater::~MachineSSAUpdater() {
   delete static_cast<AvailableValsTy*>(AV);
@@ -77,7 +82,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
 
 static
 unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
-        SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) {
+        SmallVectorImpl<std::pair<MachineBasicBlock *, unsigned>> &PredValues) {
   if (BB->empty())
     return 0;
 
@@ -136,7 +141,6 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
 /// their respective blocks.  However, the use of X happens in the *middle* of
 /// a block.  Because of this, we need to insert a new PHI node in SomeBB to
 /// merge the appropriate values, and this value isn't live out of the block.
-///
 unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
@@ -233,14 +237,15 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
 /// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
 /// template, specialized for MachineSSAUpdater.
 namespace llvm {
+
 template<>
 class SSAUpdaterTraits<MachineSSAUpdater> {
 public:
-  typedef MachineBasicBlock BlkT;
-  typedef unsigned ValT;
-  typedef MachineInstr PhiT;
+  using BlkT = MachineBasicBlock;
+  using ValT = unsigned;
+  using PhiT = MachineInstr;
+  using BlkSucc_iterator = MachineBasicBlock::succ_iterator;
 
-  typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
   static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
   static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
 
@@ -259,12 +264,16 @@ public:
     PHI_iterator &operator++() { idx += 2; return *this; } 
     bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
     bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+
     unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+
     MachineBasicBlock *getIncomingBlock() {
       return PHI->getOperand(idx+1).getMBB();
     }
   };
+
   static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+
   static inline PHI_iterator PHI_end(PhiT *PHI) {
     return PHI_iterator(PHI, true);
   }
@@ -309,7 +318,6 @@ public:
   }
 
   /// InstrIsPHI - Check if an instruction is a PHI.
-  ///
   static MachineInstr *InstrIsPHI(MachineInstr *I) {
     if (I && I->isPHI())
       return I;
@@ -338,7 +346,7 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index eaba9a58557c..e15eb658a05c 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -22,7 +22,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -42,8 +42,12 @@
 #include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
@@ -52,10 +56,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -98,7 +98,7 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
   cl::desc("Only schedule this function"));
 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
-  cl::desc("Only schedule this MBB#"));
+                                        cl::desc("Only schedule this MBB#"));
 #else
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
@@ -200,8 +200,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
                     "Machine Instruction Scheduler", false, false)
 
-MachineScheduler::MachineScheduler()
-: MachineSchedulerBase(ID) {
+MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 }
 
@@ -225,8 +224,7 @@ char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
                 "PostRA Machine Instruction Scheduler", false, false)
 
-PostMachineScheduler::PostMachineScheduler()
-: MachineSchedulerBase(ID) {
+PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
 }
 
@@ -353,7 +351,7 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 /// general bias against block splitting purely for implementation simplicity.
 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
-  if (skipFunction(*mf.getFunction()))
+  if (skipFunction(mf.getFunction()))
     return false;
 
   if (EnableMachineSched.getNumOccurrences()) {
@@ -391,7 +389,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 }
 
 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
-  if (skipFunction(*mf.getFunction()))
+  if (skipFunction(mf.getFunction()))
     return false;
 
   if (EnablePostRAMachineSched.getNumOccurrences()) {
@@ -405,6 +403,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 
   // Initialize the context of the pass.
   MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
   PassConfig = &getAnalysis<TargetPassConfig>();
 
   if (VerifyScheduling)
@@ -437,11 +436,67 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
 }
 
+/// A region of an MBB for scheduling.
+namespace {
+struct SchedRegion {
+  /// RegionBegin is the first instruction in the scheduling region, and
+  /// RegionEnd is either MBB->end() or the scheduling boundary after the
+  /// last instruction in the scheduling region. These iterators cannot refer
+  /// to instructions outside of the identified scheduling region because
+  /// those may be reordered before scheduling this region.
+  MachineBasicBlock::iterator RegionBegin;
+  MachineBasicBlock::iterator RegionEnd;
+  unsigned NumRegionInstrs;
+
+  SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
+              unsigned N) :
+    RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
+};
+} // end anonymous namespace
+
+using MBBRegionsVector = SmallVector<SchedRegion, 16>;
+
+static void
+getSchedRegions(MachineBasicBlock *MBB,
+                MBBRegionsVector &Regions,
+                bool RegionsTopDown) {
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+  MachineBasicBlock::iterator I = nullptr;
+  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+      RegionEnd != MBB->begin(); RegionEnd = I) {
+
+    // Avoid decrementing RegionEnd for blocks with no terminator.
+    if (RegionEnd != MBB->end() ||
+        isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
+      --RegionEnd;
+    }
+
+    // The next region starts above the previous region. Look backward in the
+    // instruction stream until we find the nearest boundary.
+    unsigned NumRegionInstrs = 0;
+    I = RegionEnd;
+    for (;I != MBB->begin(); --I) {
+      MachineInstr &MI = *std::prev(I);
+      if (isSchedBoundary(&MI, &*MBB, MF, TII))
+        break;
+      if (!MI.isDebugValue())
+        // MBB::size() uses instr_iterator to count. Here we need a bundle to
+        // count as a single instruction.
+        ++NumRegionInstrs;
+    }
+
+    Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
+  }
+
+  if (RegionsTopDown)
+    std::reverse(Regions.begin(), Regions.end());
+}
+
 /// Main driver for both MachineScheduler and PostMachineScheduler.
 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
                                            bool FixKillFlags) {
-  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
   // Visit all machine basic blocks.
   //
   // TODO: Visit blocks in global postorder or postorder within the bottom-up
@@ -459,39 +514,28 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
       continue;
 #endif
 
-    // Break the block into scheduling regions [I, RegionEnd), and schedule each
-    // region as soon as it is discovered. RegionEnd points the scheduling
-    // boundary at the bottom of the region. The DAG does not include RegionEnd,
-    // but the region does (i.e. the next RegionEnd is above the previous
-    // RegionBegin). If the current block has no terminator then RegionEnd ==
-    // MBB->end() for the bottom region.
+    // Break the block into scheduling regions [I, RegionEnd). RegionEnd
+    // points to the scheduling boundary at the bottom of the region. The DAG
+    // does not include RegionEnd, but the region does (i.e. the next
+    // RegionEnd is above the previous RegionBegin). If the current block has
+    // no terminator then RegionEnd == MBB->end() for the bottom region.
+    //
+    // All the regions of MBB are first found and stored in MBBRegions, which
+    // will be processed (MBB) top-down if initialized with true.
     //
     // The Scheduler may insert instructions during either schedule() or
     // exitRegion(), even for empty regions. So the local iterators 'I' and
-    // 'RegionEnd' are invalid across these calls.
-    //
-    // MBB::size() uses instr_iterator to count. Here we need a bundle to count
-    // as a single instruction.
-    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
-        RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
-
-      // Avoid decrementing RegionEnd for blocks with no terminator.
-      if (RegionEnd != MBB->end() ||
-          isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
-        --RegionEnd;
-      }
+    // 'RegionEnd' are invalid across these calls. Instructions must not be
+    // added to other regions than the current one without updating MBBRegions.
+
+    MBBRegionsVector MBBRegions;
+    getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
+    for (MBBRegionsVector::iterator R = MBBRegions.begin();
+         R != MBBRegions.end(); ++R) {
+      MachineBasicBlock::iterator I = R->RegionBegin;
+      MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
+      unsigned NumRegionInstrs = R->NumRegionInstrs;
 
-      // The next region starts above the previous region. Look backward in the
-      // instruction stream until we find the nearest boundary.
-      unsigned NumRegionInstrs = 0;
-      MachineBasicBlock::iterator I = RegionEnd;
-      for (; I != MBB->begin(); --I) {
-        MachineInstr &MI = *std::prev(I);
-        if (isSchedBoundary(&MI, &*MBB, MF, TII))
-          break;
-        if (!MI.isDebugValue())
-          ++NumRegionInstrs;
-      }
       // Notify the scheduler of the region, even if we may skip scheduling
       // it. Perhaps it still needs to be bundled.
       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
@@ -504,28 +548,23 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
         continue;
       }
       DEBUG(dbgs() << "********** MI Scheduling **********\n");
-      DEBUG(dbgs() << MF->getName()
-            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
-            << "\n  From: " << *I << "    To: ";
+      DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " "
+                   << MBB->getName() << "\n  From: " << *I << "    To: ";
             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
             else dbgs() << "End";
             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
       if (DumpCriticalPathLength) {
         errs() << MF->getName();
-        errs() << ":BB# " << MBB->getNumber();
+        errs() << ":%bb. " << MBB->getNumber();
         errs() << " " << MBB->getName() << " \n";
       }
 
       // Schedule a region: possibly reorder instructions.
-      // This invalidates 'RegionEnd' and 'I'.
+      // This invalidates the original region iterators.
       Scheduler.schedule();
 
       // Close the current region.
       Scheduler.exitRegion();
-
-      // Scheduling has invalidated the current iterator 'I'. Ask the
-      // scheduler for the top of it's scheduled region.
-      RegionEnd = Scheduler.begin();
     }
     Scheduler.finishBlock();
     // FIXME: Ideally, no further passes should rely on kill flags. However,
@@ -650,6 +689,16 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
     releasePred(SU, &Pred);
 }
 
+void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
+  ScheduleDAGInstrs::startBlock(bb);
+  SchedImpl->enterMBB(bb);
+}
+
+void ScheduleDAGMI::finishBlock() {
+  SchedImpl->leaveMBB();
+  ScheduleDAGInstrs::finishBlock();
+}
+
 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 /// the region, including the boundary itself and single-instruction regions
@@ -773,11 +822,11 @@ void ScheduleDAGMI::schedule() {
   placeDebugValues();
 
   DEBUG({
-      unsigned BBNum = begin()->getParent()->getNumber();
-      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
-      dumpSchedule();
-      dbgs() << '\n';
-    });
+    dbgs() << "*** Final schedule for "
+           << printMBBReference(*begin()->getParent()) << " ***\n";
+    dumpSchedule();
+    dbgs() << '\n';
+  });
 }
 
 /// Apply each ScheduleDAGMutation step in order.
@@ -1004,7 +1053,10 @@ void ScheduleDAGMILive::initRegPressure() {
     dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
   );
 
-  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+  assert((BotRPTracker.getPos() == RegionEnd ||
+          (RegionEnd->isDebugValue() &&
+           BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
+         "Can't find the region bottom");
 
   // Cache the list of excess pressure sets in this region. This will also track
   // the max pressure in the scheduled code for these sets.
@@ -1080,7 +1132,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
         PDiff.addPressureChange(Reg, Decrement, &MRI);
         DEBUG(
           dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
-                 << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
+                 << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
                  << ' ' << *SU.getInstr();
           dbgs() << "              to ";
           PDiff.dump(*TRI);
@@ -1088,7 +1140,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
       }
     } else {
       assert(P.LaneMask.any());
-      DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
+      DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
       // This may be called before CurrentBottom has been initialized. However,
       // BotRPTracker must have a valid position. We want the value live into the
       // instruction or live out of the block, so ask for the previous
@@ -1211,11 +1263,11 @@ void ScheduleDAGMILive::schedule() {
   placeDebugValues();
 
   DEBUG({
-      unsigned BBNum = begin()->getParent()->getNumber();
-      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
-      dumpSchedule();
-      dbgs() << '\n';
-    });
+    dbgs() << "*** Final schedule for "
+           << printMBBReference(*begin()->getParent()) << " ***\n";
+    dumpSchedule();
+    dbgs() << '\n';
+  });
 }
 
 /// Build the DAG and setup three register pressure trackers.
@@ -1410,7 +1462,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
         RegOpers.detectDeadDefs(*MI, *LIS);
       }
 
-      BotRPTracker.recedeSkipDebugValues();
+      if (BotRPTracker.getPos() != CurrentBottom)
+        BotRPTracker.recedeSkipDebugValues();
       SmallVector<RegisterMaskPair, 8> LiveUses;
       BotRPTracker.recede(RegOpers, &LiveUses);
       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
@@ -1511,14 +1564,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
   std::sort(MemOpRecords.begin(), MemOpRecords.end());
   unsigned ClusterLength = 1;
   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
-    if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
-      ClusterLength = 1;
-      continue;
-    }
-
     SUnit *SUa = MemOpRecords[Idx].SU;
     SUnit *SUb = MemOpRecords[Idx+1].SU;
-    if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
+    if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
+                                 *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
                                  ClusterLength) &&
         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
       DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
@@ -1541,7 +1590,6 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
 
 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
-
   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
 
   // Map DAG NodeNum to store chain ID.
@@ -1587,6 +1635,7 @@ namespace {
 class CopyConstrain : public ScheduleDAGMutation {
   // Transient state.
   SlotIndex RegionBeginIdx;
+
   // RegionEndIdx is the slot index of the last non-debug instruction in the
   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
   SlotIndex RegionEndIdx;
@@ -1785,6 +1834,13 @@ static const unsigned InvalidCycle = ~0U;
 
 SchedBoundary::~SchedBoundary() { delete HazardRec; }
 
+/// Given a Count of resource usage and a Latency value, return true if a
+/// SchedBoundary becomes resource limited.
+static bool checkResourceLimit(unsigned LFactor, unsigned Count,
+                               unsigned Latency) {
+  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
+}
+
 void SchedBoundary::reset() {
   // A new HazardRec is created for each DAG and owned by SchedBoundary.
   // Destroying and reconstructing it is very expensive though. So keep
@@ -1916,16 +1972,18 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
 
   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
-    for (TargetSchedModel::ProcResIter
-           PI = SchedModel->getWriteProcResBegin(SC),
-           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-      unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
+    for (const MCWriteProcResEntry &PE :
+          make_range(SchedModel->getWriteProcResBegin(SC),
+                     SchedModel->getWriteProcResEnd(SC))) {
+      unsigned ResIdx = PE.ProcResourceIdx;
+      unsigned Cycles = PE.Cycles;
+      unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
       if (NRCycle > CurrCycle) {
 #ifndef NDEBUG
-        MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
+        MaxObservedStall = std::max(Cycles, MaxObservedStall);
 #endif
         DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
-              << SchedModel->getResourceName(PI->ProcResourceIdx)
+              << SchedModel->getResourceName(ResIdx)
               << "=" << NRCycle << "c\n");
         return true;
       }
@@ -2037,10 +2095,9 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
     }
   }
   CheckPending = true;
-  unsigned LFactor = SchedModel->getLatencyFactor();
   IsResourceLimited =
-    (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
-    > (int)LFactor;
+      checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
+                         getScheduledLatency());
 
   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
 }
@@ -2193,16 +2250,15 @@ void SchedBoundary::bumpNode(SUnit *SU) {
           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
   }
   // If we stall for any reason, bump the cycle.
-  if (NextCycle > CurrCycle) {
+  if (NextCycle > CurrCycle)
     bumpCycle(NextCycle);
-  } else {
+  else
     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
     // resource limited. If a stall occurred, bumpCycle does this.
-    unsigned LFactor = SchedModel->getLatencyFactor();
     IsResourceLimited =
-      (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
-      > (int)LFactor;
-  }
+        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
+                           getScheduledLatency());
+
   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
   // one cycle.  Since we commonly reach the max MOps here, opportunistically
@@ -2317,7 +2373,7 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
     ResCount = getResourceCount(ZoneCritResIdx);
   } else {
     ResFactor = SchedModel->getMicroOpFactor();
-    ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
+    ResCount = RetiredMOps * ResFactor;
   }
   unsigned LFactor = SchedModel->getLatencyFactor();
   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
@@ -2387,10 +2443,10 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
 
   bool OtherResLimited = false;
-  if (SchedModel->hasInstrSchedModel()) {
-    unsigned LFactor = SchedModel->getLatencyFactor();
-    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
-  }
+  if (SchedModel->hasInstrSchedModel())
+    OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
+                                         OtherCount, RemLatency);
+
   // Schedule aggressively for latency in PostRA mode. We don't check for
   // acyclic latency during PostRA, and highly out-of-order processors will
   // skip PostRA scheduling.
@@ -2605,7 +2661,7 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
                                   MachineBasicBlock::iterator End,
                                   unsigned NumRegionInstrs) {
-  const MachineFunction &MF = *Begin->getParent()->getParent();
+  const MachineFunction &MF = *Begin->getMF();
   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
 
   // Avoid setting up the register pressure tracker for small regions to save
@@ -3199,7 +3255,6 @@ void PostGenericScheduler::registerRoots() {
 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
                                         SchedCandidate &TryCand) {
-
   // Initialize the candidate if needed.
   if (!Cand.isValid()) {
     TryCand.Reason = NodeOrder;
@@ -3438,6 +3493,7 @@ class InstructionShuffler : public MachineSchedStrategy {
   // instructions to be scheduled first.
   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
     TopQ;
+
   // When scheduling bottom-up, use greater-than as the queue priority.
   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
     BottomQ;
@@ -3554,6 +3610,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
       SS << " I:" << DFS->getNumInstrs(SU);
     return SS.str();
   }
+
   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
     return G->getGraphNodeLabel(SU);
   }
@@ -3577,7 +3634,6 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
 
 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
 /// rendered using 'dot'.
-///
 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
 #ifndef NDEBUG
   ViewGraph(this, Name, false, Title);
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 79e3fea3f90c..bedfdd84b1ca 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -1,4 +1,4 @@
-//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -32,14 +33,17 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -93,12 +97,12 @@ namespace {
     // Remember which edges we are about to split.
     // This is different from CEBCandidates since those edges
     // will be split.
-    SetVector<std::pair<MachineBasicBlock*, MachineBasicBlock*> > ToSplit;
+    SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
 
     SparseBitVector<> RegsToClearKillFlags;
 
-    typedef std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>
-        AllSuccsCache;
+    using AllSuccsCache =
+        std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
 
   public:
     static char ID; // Pass identification
@@ -133,6 +137,7 @@ namespace {
     bool isWorthBreakingCriticalEdge(MachineInstr &MI,
                                      MachineBasicBlock *From,
                                      MachineBasicBlock *To);
+
     /// \brief Postpone the splitting of the given critical
     /// edge (\p From, \p To).
     ///
@@ -150,6 +155,7 @@ namespace {
                                    MachineBasicBlock *To,
                                    bool BreakPHIEdge);
     bool SinkInstruction(MachineInstr &MI, bool &SawStore,
+
                          AllSuccsCache &AllSuccessors);
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
                                  MachineBasicBlock *DefMBB,
@@ -172,7 +178,9 @@ namespace {
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
+
 char &llvm::MachineSinkingID = MachineSinking::ID;
+
 INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
                       "Machine code sinking", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
@@ -236,17 +244,17 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
   // into and they are all PHI nodes. In this case, machine-sink must break
   // the critical edge first. e.g.
   //
-  // BB#1: derived from LLVM BB %bb4.preheader
-  //   Predecessors according to CFG: BB#0
+  // %bb.1: derived from LLVM BB %bb4.preheader
+  //   Predecessors according to CFG: %bb.0
   //     ...
-  //     %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+  //     %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags
   //     ...
-  //     JE_4 <BB#37>, %EFLAGS<imp-use>
-  //   Successors according to CFG: BB#37 BB#2
+  //     JE_4 <%bb.37>, implicit %eflags
+  //   Successors according to CFG: %bb.37 %bb.2
   //
-  // BB#2: derived from LLVM BB %bb.nph
-  //   Predecessors according to CFG: BB#0 BB#1
-  //     %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+  // %bb.2: derived from LLVM BB %bb.nph
+  //   Predecessors according to CFG: %bb.0 %bb.1
+  //     %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
   BreakPHIEdge = true;
   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     MachineInstr *UseInst = MO.getParent();
@@ -284,7 +292,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
 }
 
 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   DEBUG(dbgs() << "******** Machine Sinking ********\n");
@@ -314,10 +322,10 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     for (auto &Pair : ToSplit) {
       auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
       if (NewSucc != nullptr) {
-        DEBUG(dbgs() << " *** Splitting critical edge:"
-              " BB#" << Pair.first->getNumber()
-              << " -- BB#" << NewSucc->getNumber()
-              << " -- BB#" << Pair.second->getNumber() << '\n');
+        DEBUG(dbgs() << " *** Splitting critical edge: "
+                     << printMBBReference(*Pair.first) << " -- "
+                     << printMBBReference(*NewSucc) << " -- "
+                     << printMBBReference(*Pair.second) << '\n');
         MadeChange = true;
         ++NumSplit;
       } else
@@ -453,33 +461,33 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
   // It's not always legal to break critical edges and sink the computation
   // to the edge.
   //
-  // BB#1:
+  // %bb.1:
   // v1024
-  // Beq BB#3
+  // Beq %bb.3
   // <fallthrough>
-  // BB#2:
+  // %bb.2:
   // ... no uses of v1024
   // <fallthrough>
-  // BB#3:
+  // %bb.3:
   // ...
   //       = v1024
   //
-  // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+  // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
   //
-  // BB#1:
+  // %bb.1:
   // ...
-  // Bne BB#2
-  // BB#4:
+  // Bne %bb.2
+  // %bb.4:
   // v1024 =
-  // B BB#3
-  // BB#2:
+  // B %bb.3
+  // %bb.2:
   // ... no uses of v1024
   // <fallthrough>
-  // BB#3:
+  // %bb.3:
   // ...
   //       = v1024
   //
-  // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+  // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
   // flow. We need to ensure the new basic block where the computation is
   // sunk to dominates all the uses.
   // It's only legal to break critical edge and sink the computation to the
@@ -570,7 +578,6 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
 SmallVector<MachineBasicBlock *, 4> &
 MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
                                        AllSuccsCache &AllSuccessors) const {
-
   // Do we have the sorted successors in cache ?
   auto Succs = AllSuccessors.find(MBB);
   if (Succs != AllSuccessors.end())
@@ -711,7 +718,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
 static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
                                              const TargetInstrInfo *TII,
                                              const TargetRegisterInfo *TRI) {
-  typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+  using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
 
   auto *MBB = MI.getParent();
   if (MBB->pred_size() != 1)
@@ -784,7 +791,6 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
   if (!SuccToSinkTo)
     return false;
 
-
   // If the instruction to move defines a dead physical register which is live
   // when leaving the basic block, don't move it because it could turn into a
   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
@@ -863,11 +869,20 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
   SmallVector<MachineInstr *, 2> DbgValuesToSink;
   collectDebugValues(MI, DbgValuesToSink);
 
+  // Merge or erase debug location to ensure consistent stepping in profilers
+  // and debuggers.
+  if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end())
+    MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
+                                                 InsertPos->getDebugLoc()));
+  else
+    MI.setDebugLoc(DebugLoc());
+
+
   // Move the instruction.
   SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
                        ++MachineBasicBlock::iterator(MI));
 
-  // Move debug values.
+  // Move previously adjacent debug value instructions to the insert position.
   for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
          DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
     MachineInstr *DbgMI = *DBI;
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 6c5abc66fba1..d81c6f8a31e1 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -22,15 +22,15 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
@@ -42,6 +42,7 @@ using namespace llvm;
 #define DEBUG_TYPE "machine-trace-metrics"
 
 char MachineTraceMetrics::ID = 0;
+
 char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
 
 INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
@@ -395,7 +396,8 @@ MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
 }
 
 void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
-  DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+  DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
+               << '\n');
   BlockInfo[MBB->getNumber()].invalidate();
   for (unsigned i = 0; i != TS_NumStrategies; ++i)
     if (Ensembles[i])
@@ -475,8 +477,8 @@ public:
 
 /// Compute the trace through MBB.
 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
-  DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
-               << MBB->getNumber() << '\n');
+  DEBUG(dbgs() << "Computing " << getName() << " trace through "
+               << printMBBReference(*MBB) << '\n');
   // Set up loop bounds for the backwards post-order traversal.
   LoopBounds Bounds(BlockInfo, MTM.Loops);
 
@@ -484,13 +486,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
   Bounds.Downward = false;
   Bounds.Visited.clear();
   for (auto I : inverse_post_order_ext(MBB, Bounds)) {
-    DEBUG(dbgs() << "  pred for BB#" << I->getNumber() << ": ");
+    DEBUG(dbgs() << "  pred for " << printMBBReference(*I) << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the predecessors have been visited, pick the preferred one.
     TBI.Pred = pickTracePred(I);
     DEBUG({
       if (TBI.Pred)
-        dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+        dbgs() << printMBBReference(*TBI.Pred) << '\n';
       else
         dbgs() << "null\n";
     });
@@ -502,13 +504,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
   Bounds.Downward = true;
   Bounds.Visited.clear();
   for (auto I : post_order_ext(MBB, Bounds)) {
-    DEBUG(dbgs() << "  succ for BB#" << I->getNumber() << ": ");
+    DEBUG(dbgs() << "  succ for " << printMBBReference(*I) << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the successors have been visited, pick the preferred one.
     TBI.Succ = pickTraceSucc(I);
     DEBUG({
       if (TBI.Succ)
-        dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+        dbgs() << printMBBReference(*TBI.Succ) << '\n';
       else
         dbgs() << "null\n";
     });
@@ -529,8 +531,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
     WorkList.push_back(BadMBB);
     do {
       const MachineBasicBlock *MBB = WorkList.pop_back_val();
-      DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
-            << " height.\n");
+      DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+                   << getName() << " height.\n");
       // Find any MBB predecessors that have MBB as their preferred successor.
       // They are the only ones that need to be invalidated.
       for (const MachineBasicBlock *Pred : MBB->predecessors()) {
@@ -554,8 +556,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
     WorkList.push_back(BadMBB);
     do {
       const MachineBasicBlock *MBB = WorkList.pop_back_val();
-      DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
-            << " depth.\n");
+      DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
+                   << getName() << " depth.\n");
       // Find any MBB successors that have MBB as their preferred predecessor.
       // They are the only ones that need to be invalidated.
       for (const MachineBasicBlock *Succ : MBB->successors()) {
@@ -694,25 +696,6 @@ static void getPHIDeps(const MachineInstr &UseMI,
   }
 }
 
-// Keep track of physreg data dependencies by recording each live register unit.
-// Associate each regunit with an instruction operand. Depending on the
-// direction instructions are scanned, it could be the operand that defined the
-// regunit, or the highest operand to read the regunit.
-namespace {
-
-struct LiveRegUnit {
-  unsigned RegUnit;
-  unsigned Cycle = 0;
-  const MachineInstr *MI = nullptr;
-  unsigned Op = 0;
-
-  unsigned getSparseSetIndex() const { return RegUnit; }
-
-  LiveRegUnit(unsigned RU) : RegUnit(RU) {}
-};
-
-} // end anonymous namespace
-
 // Identify physreg dependencies for UseMI, and update the live regunit
 // tracking set when scanning instructions downwards.
 static void updatePhysDepsDownwards(const MachineInstr *UseMI,
@@ -797,6 +780,59 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
   return MaxLen;
 }
 
+void MachineTraceMetrics::Ensemble::
+updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,
+            SparseSet<LiveRegUnit> &RegUnits) {
+  SmallVector<DataDep, 8> Deps;
+  // Collect all data dependencies.
+  if (UseMI.isPHI())
+    getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+  else if (getDataDeps(UseMI, Deps, MTM.MRI))
+    updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
+
+  // Filter and process dependencies, computing the earliest issue cycle.
+  unsigned Cycle = 0;
+  for (const DataDep &Dep : Deps) {
+    const TraceBlockInfo&DepTBI =
+      BlockInfo[Dep.DefMI->getParent()->getNumber()];
+    // Ignore dependencies from outside the current trace.
+    if (!DepTBI.isUsefulDominator(TBI))
+      continue;
+    assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+    unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+    // Add latency if DefMI is a real instruction. Transients get latency 0.
+    if (!Dep.DefMI->isTransient())
+      DepCycle += MTM.SchedModel
+        .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
+    Cycle = std::max(Cycle, DepCycle);
+  }
+  // Remember the instruction depth.
+  InstrCycles &MICycles = Cycles[&UseMI];
+  MICycles.Depth = Cycle;
+
+  if (TBI.HasValidInstrHeights) {
+    // Update critical path length.
+    TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+    DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
+  } else {
+    DEBUG(dbgs() << Cycle << '\t' << UseMI);
+  }
+}
+
+void MachineTraceMetrics::Ensemble::
+updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI,
+            SparseSet<LiveRegUnit> &RegUnits) {
+  updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
+}
+
+void MachineTraceMetrics::Ensemble::
+updateDepths(MachineBasicBlock::iterator Start,
+             MachineBasicBlock::iterator End,
+             SparseSet<LiveRegUnit> &RegUnits) {
+    for (; Start != End; Start++)
+      updateDepth(Start->getParent(), *Start, RegUnits);
+}
+
 /// Compute instruction depths for all instructions above or in MBB in its
 /// trace. This assumes that the trace through MBB has already been computed.
 void MachineTraceMetrics::Ensemble::
@@ -822,10 +858,9 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
   RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
 
   // Go through trace blocks in top-down order, stopping after the center block.
-  SmallVector<DataDep, 8> Deps;
   while (!Stack.empty()) {
     MBB = Stack.pop_back_val();
-    DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
+    DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
     TBI.HasValidInstrDepths = true;
     TBI.CriticalPath = 0;
@@ -848,40 +883,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
       TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
 
     for (const auto &UseMI : *MBB) {
-      // Collect all data dependencies.
-      Deps.clear();
-      if (UseMI.isPHI())
-        getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
-      else if (getDataDeps(UseMI, Deps, MTM.MRI))
-        updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
-
-      // Filter and process dependencies, computing the earliest issue cycle.
-      unsigned Cycle = 0;
-      for (const DataDep &Dep : Deps) {
-        const TraceBlockInfo&DepTBI =
-          BlockInfo[Dep.DefMI->getParent()->getNumber()];
-        // Ignore dependencies from outside the current trace.
-        if (!DepTBI.isUsefulDominator(TBI))
-          continue;
-        assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
-        unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
-        // Add latency if DefMI is a real instruction. Transients get latency 0.
-        if (!Dep.DefMI->isTransient())
-          DepCycle += MTM.SchedModel
-            .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
-        Cycle = std::max(Cycle, DepCycle);
-      }
-      // Remember the instruction depth.
-      InstrCycles &MICycles = Cycles[&UseMI];
-      MICycles.Depth = Cycle;
-
-      if (!TBI.HasValidInstrHeights) {
-        DEBUG(dbgs() << Cycle << '\t' << UseMI);
-        continue;
-      }
-      // Update critical path length.
-      TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
-      DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
+      updateDepth(TBI, UseMI, RegUnits);
     }
   }
 }
@@ -945,7 +947,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
   return Height;
 }
 
-typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+using MIHeightMap = DenseMap<const MachineInstr *, unsigned>;
 
 // Push the height of DefMI upwards if required to match UseMI.
 // Return true if this is the first time DefMI was seen.
@@ -1043,7 +1045,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
   SmallVector<DataDep, 8> Deps;
   for (;!Stack.empty(); Stack.pop_back()) {
     MBB = Stack.back();
-    DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+    DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
     TBI.HasValidInstrHeights = true;
     TBI.CriticalPath = 0;
@@ -1130,18 +1132,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
 
     // Update virtual live-in heights. They were added by addLiveIns() with a 0
     // height because the final height isn't known until now.
-    DEBUG(dbgs() << "BB#" << MBB->getNumber() <<  " Live-ins:");
+    DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
     for (LiveInReg &LIR : TBI.LiveIns) {
       const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
       LIR.Height = Heights.lookup(DefMI);
-      DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+      DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
     }
 
     // Transfer the live regunits to the live-in list.
     for (SparseSet<LiveRegUnit>::const_iterator
          RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
       TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
-      DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+      DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI)
                    << '@' << RI->Cycle);
     }
     DEBUG(dbgs() << '\n');
@@ -1288,7 +1290,7 @@ bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
   OS << getName() << " ensemble:\n";
   for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
-    OS << "  BB#" << i << '\t';
+    OS << "  %bb." << i << '\t';
     BlockInfo[i].print(OS);
     OS << '\n';
   }
@@ -1298,10 +1300,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
   if (hasValidDepth()) {
     OS << "depth=" << InstrDepth;
     if (Pred)
-      OS << " pred=BB#" << Pred->getNumber();
+      OS << " pred=" << printMBBReference(*Pred);
     else
       OS << " pred=null";
-    OS << " head=BB#" << Head;
+    OS << " head=%bb." << Head;
     if (HasValidInstrDepths)
       OS << " +instrs";
   } else
@@ -1310,10 +1312,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
   if (hasValidHeight()) {
     OS << "height=" << InstrHeight;
     if (Succ)
-      OS << " succ=BB#" << Succ->getNumber();
+      OS << " succ=" << printMBBReference(*Succ);
     else
       OS << " succ=null";
-    OS << " tail=BB#" << Tail;
+    OS << " tail=%bb." << Tail;
     if (HasValidInstrHeights)
       OS << " +instrs";
   } else
@@ -1325,18 +1327,18 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
 void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
   unsigned MBBNum = &TBI - &TE.BlockInfo[0];
 
-  OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
-     << " --> BB#" << TBI.Tail << ':';
+  OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
+     << " --> %bb." << TBI.Tail << ':';
   if (TBI.hasValidHeight() && TBI.hasValidDepth())
     OS << ' ' << getInstrCount() << " instrs.";
   if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
     OS << ' ' << TBI.CriticalPath << " cycles.";
 
   const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
-  OS << "\nBB#" << MBBNum;
+  OS << "\n%bb." << MBBNum;
   while (Block->hasValidDepth() && Block->Pred) {
     unsigned Num = Block->Pred->getNumber();
-    OS << " <- BB#" << Num;
+    OS << " <- " << printMBBReference(*Block->Pred);
     Block = &TE.BlockInfo[Num];
   }
 
@@ -1344,7 +1346,7 @@ void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
   OS << "\n    ";
   while (Block->hasValidHeight() && Block->Succ) {
     unsigned Num = Block->Succ->getNumber();
-    OS << " -> BB#" << Num;
+    OS << " -> " << printMBBReference(*Block->Succ);
     Block = &TE.BlockInfo[Num];
   }
   OS << '\n';
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index c50a95a06505..c9fe7681e280 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1,4 +1,4 @@
-//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//===- MachineVerifier.cpp - Machine Code Verifier ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,41 +23,68 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+
 using namespace llvm;
 
 namespace {
-  struct MachineVerifier {
 
-    MachineVerifier(Pass *pass, const char *b) :
-      PASS(pass),
-      Banner(b)
-      {}
+  struct MachineVerifier {
+    MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {}
 
     unsigned verify(MachineFunction &MF);
 
@@ -75,11 +102,11 @@ namespace {
     bool isFunctionRegBankSelected;
     bool isFunctionSelected;
 
-    typedef SmallVector<unsigned, 16> RegVector;
-    typedef SmallVector<const uint32_t*, 4> RegMaskVector;
-    typedef DenseSet<unsigned> RegSet;
-    typedef DenseMap<unsigned, const MachineInstr*> RegMap;
-    typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
+    using RegVector = SmallVector<unsigned, 16>;
+    using RegMaskVector = SmallVector<const uint32_t *, 4>;
+    using RegSet = DenseSet<unsigned>;
+    using RegMap = DenseMap<unsigned, const MachineInstr *>;
+    using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
 
     const MachineInstr *FirstTerminator;
     BlockSet FunctionBlocks;
@@ -101,7 +128,7 @@ namespace {
 
     struct BBInfo {
       // Is this MBB reachable from the MF entry point?
-      bool reachable;
+      bool reachable = false;
 
       // Vregs that must be live in because they are used without being
       // defined. Map value is the user.
@@ -126,7 +153,7 @@ namespace {
       // Set versions of block's predecessor and successor lists.
       BlockSet Preds, Succs;
 
-      BBInfo() : reachable(false) {}
+      BBInfo() = default;
 
       // Add register to vregsPassed if it belongs there. Return true if
       // anything changed.
@@ -237,7 +264,7 @@ namespace {
 
     void markReachable(const MachineBasicBlock *MBB);
     void calcRegsPassed();
-    void checkPHIOps(const MachineBasicBlock *MBB);
+    void checkPHIOps(const MachineBasicBlock &MBB);
 
     void calcRegsRequired();
     void verifyLiveVariables();
@@ -259,6 +286,7 @@ namespace {
 
   struct MachineVerifierPass : public MachineFunctionPass {
     static char ID; // Pass ID, replacement for typeid
+
     const std::string Banner;
 
     MachineVerifierPass(std::string banner = std::string())
@@ -279,9 +307,10 @@ namespace {
     }
   };
 
-}
+} // end anonymous namespace
 
 char MachineVerifierPass::ID = 0;
+
 INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
                 "Verify generated machine code", false, false)
 
@@ -442,9 +471,8 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  errs() << "- basic block: BB#" << MBB->getNumber()
-      << ' ' << MBB->getName()
-      << " (" << (const void*)MBB << ')';
+  errs() << "- basic block: " << printMBBReference(*MBB) << ' '
+         << MBB->getName() << " (" << (const void *)MBB << ')';
   if (Indexes)
     errs() << " [" << Indexes->getMBBStartIdx(MBB)
         << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
@@ -499,14 +527,14 @@ void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
 }
 
 void MachineVerifier::report_context_vreg(unsigned VReg) const {
-  errs() << "- v. register: " << PrintReg(VReg, TRI) << '\n';
+  errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
 }
 
 void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
   if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
     report_context_vreg(VRegOrUnit);
   } else {
-    errs() << "- regunit:     " << PrintRegUnit(VRegOrUnit, TRI) << '\n';
+    errs() << "- regunit:     " << printRegUnit(VRegOrUnit, TRI) << '\n';
   }
 }
 
@@ -590,8 +618,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       report("MBB has successor that isn't part of the function.", MBB);
     if (!MBBInfoMap[*I].Preds.count(MBB)) {
       report("Inconsistent CFG", MBB);
-      errs() << "MBB is not in the predecessor list of the successor BB#"
-          << (*I)->getNumber() << ".\n";
+      errs() << "MBB is not in the predecessor list of the successor "
+             << printMBBReference(*(*I)) << ".\n";
     }
   }
 
@@ -602,19 +630,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       report("MBB has predecessor that isn't part of the function.", MBB);
     if (!MBBInfoMap[*I].Succs.count(MBB)) {
       report("Inconsistent CFG", MBB);
-      errs() << "MBB is not in the successor list of the predecessor BB#"
-          << (*I)->getNumber() << ".\n";
+      errs() << "MBB is not in the successor list of the predecessor "
+             << printMBBReference(*(*I)) << ".\n";
     }
   }
 
   const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
   const BasicBlock *BB = MBB->getBasicBlock();
-  const Function *Fn = MF->getFunction();
+  const Function &F = MF->getFunction();
   if (LandingPadSuccs.size() > 1 &&
       !(AsmInfo &&
         AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
         BB && isa<SwitchInst>(BB->getTerminator())) &&
-      !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn())))
+      !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
     report("MBB has more than one landing pad successor", MBB);
 
   // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
@@ -926,6 +954,23 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
       report("Generic instruction accessing memory must have one mem operand",
              MI);
     break;
+  case TargetOpcode::G_PHI: {
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    if (!DstTy.isValid() ||
+        !std::all_of(MI->operands_begin() + 1, MI->operands_end(),
+                     [this, &DstTy](const MachineOperand &MO) {
+                       if (!MO.isReg())
+                         return true;
+                       LLT Ty = MRI->getType(MO.getReg());
+                       if (!Ty.isValid() || (Ty != DstTy))
+                         return false;
+                       return true;
+                     }))
+      report("Generic Instruction G_PHI has operands with incompatible/missing "
+             "types",
+             MI);
+    break;
+  }
   case TargetOpcode::STATEPOINT:
     if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||
         !MI->getOperand(StatepointOpers::NBytesPos).isImm() ||
@@ -1039,101 +1084,112 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       report("Two-address instruction operands must be identical", MO, MONum);
 
     // Check register classes.
-    if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
-      unsigned SubIdx = MO->getSubReg();
+    unsigned SubIdx = MO->getSubReg();
 
-      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        if (SubIdx) {
-          report("Illegal subregister index for physical register", MO, MONum);
-          return;
-        }
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (SubIdx) {
+        report("Illegal subregister index for physical register", MO, MONum);
+        return;
+      }
+      if (MONum < MCID.getNumOperands()) {
         if (const TargetRegisterClass *DRC =
               TII->getRegClass(MCID, MONum, TRI, *MF)) {
           if (!DRC->contains(Reg)) {
             report("Illegal physical register for instruction", MO, MONum);
-            errs() << TRI->getName(Reg) << " is not a "
-                << TRI->getRegClassName(DRC) << " register.\n";
+            errs() << printReg(Reg, TRI) << " is not a "
+                   << TRI->getRegClassName(DRC) << " register.\n";
           }
         }
-      } else {
-        // Virtual register.
-        const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
-        if (!RC) {
-          // This is a generic virtual register.
-
-          // If we're post-Select, we can't have gvregs anymore.
-          if (isFunctionSelected) {
-            report("Generic virtual register invalid in a Selected function",
-                   MO, MONum);
-            return;
-          }
+      }
+      if (MO->isRenamable() &&
+          ((MO->isDef() && MI->hasExtraDefRegAllocReq()) ||
+           (MO->isUse() && MI->hasExtraSrcRegAllocReq()))) {
+        report("Illegal isRenamable setting for opcode with extra regalloc "
+               "requirements",
+               MO, MONum);
+        return;
+      }
+    } else {
+      // Virtual register.
+      const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
+      if (!RC) {
+        // This is a generic virtual register.
+
+        // If we're post-Select, we can't have gvregs anymore.
+        if (isFunctionSelected) {
+          report("Generic virtual register invalid in a Selected function",
+                 MO, MONum);
+          return;
+        }
 
-          // The gvreg must have a type and it must not have a SubIdx.
-          LLT Ty = MRI->getType(Reg);
-          if (!Ty.isValid()) {
-            report("Generic virtual register must have a valid type", MO,
-                   MONum);
-            return;
-          }
+        // The gvreg must have a type and it must not have a SubIdx.
+        LLT Ty = MRI->getType(Reg);
+        if (!Ty.isValid()) {
+          report("Generic virtual register must have a valid type", MO,
+                 MONum);
+          return;
+        }
 
-          const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+        const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
 
-          // If we're post-RegBankSelect, the gvreg must have a bank.
-          if (!RegBank && isFunctionRegBankSelected) {
-            report("Generic virtual register must have a bank in a "
-                   "RegBankSelected function",
-                   MO, MONum);
-            return;
-          }
+        // If we're post-RegBankSelect, the gvreg must have a bank.
+        if (!RegBank && isFunctionRegBankSelected) {
+          report("Generic virtual register must have a bank in a "
+                 "RegBankSelected function",
+                 MO, MONum);
+          return;
+        }
 
-          // Make sure the register fits into its register bank if any.
-          if (RegBank && Ty.isValid() &&
-              RegBank->getSize() < Ty.getSizeInBits()) {
-            report("Register bank is too small for virtual register", MO,
-                   MONum);
-            errs() << "Register bank " << RegBank->getName() << " too small("
-                   << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
-                   << "-bits\n";
-            return;
-          }
-          if (SubIdx)  {
-            report("Generic virtual register does not subregister index", MO,
-                   MONum);
-            return;
-          }
+        // Make sure the register fits into its register bank if any.
+        if (RegBank && Ty.isValid() &&
+            RegBank->getSize() < Ty.getSizeInBits()) {
+          report("Register bank is too small for virtual register", MO,
+                 MONum);
+          errs() << "Register bank " << RegBank->getName() << " too small("
+                 << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
+                 << "-bits\n";
+          return;
+        }
+        if (SubIdx)  {
+          report("Generic virtual register does not subregister index", MO,
+                 MONum);
+          return;
+        }
 
-          // If this is a target specific instruction and this operand
-          // has register class constraint, the virtual register must
-          // comply to it.
-          if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
-              TII->getRegClass(MCID, MONum, TRI, *MF)) {
-            report("Virtual register does not match instruction constraint", MO,
-                   MONum);
-            errs() << "Expect register class "
-                   << TRI->getRegClassName(
-                          TII->getRegClass(MCID, MONum, TRI, *MF))
-                   << " but got nothing\n";
-            return;
-          }
+        // If this is a target specific instruction and this operand
+        // has register class constraint, the virtual register must
+        // comply to it.
+        if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
+            MONum < MCID.getNumOperands() &&
+            TII->getRegClass(MCID, MONum, TRI, *MF)) {
+          report("Virtual register does not match instruction constraint", MO,
+                 MONum);
+          errs() << "Expect register class "
+                 << TRI->getRegClassName(
+                        TII->getRegClass(MCID, MONum, TRI, *MF))
+                 << " but got nothing\n";
+          return;
+        }
 
-          break;
+        break;
+      }
+      if (SubIdx) {
+        const TargetRegisterClass *SRC =
+          TRI->getSubClassWithSubReg(RC, SubIdx);
+        if (!SRC) {
+          report("Invalid subregister index for virtual register", MO, MONum);
+          errs() << "Register class " << TRI->getRegClassName(RC)
+              << " does not support subreg index " << SubIdx << "\n";
+          return;
         }
-        if (SubIdx) {
-          const TargetRegisterClass *SRC =
-            TRI->getSubClassWithSubReg(RC, SubIdx);
-          if (!SRC) {
-            report("Invalid subregister index for virtual register", MO, MONum);
-            errs() << "Register class " << TRI->getRegClassName(RC)
-                << " does not support subreg index " << SubIdx << "\n";
-            return;
-          }
-          if (RC != SRC) {
-            report("Invalid register class for subregister index", MO, MONum);
-            errs() << "Register class " << TRI->getRegClassName(RC)
-                << " does not fully support subreg index " << SubIdx << "\n";
-            return;
-          }
+        if (RC != SRC) {
+          report("Invalid register class for subregister index", MO, MONum);
+          errs() << "Register class " << TRI->getRegClassName(RC)
+              << " does not fully support subreg index " << SubIdx << "\n";
+          return;
         }
+      }
+      if (MONum < MCID.getNumOperands()) {
         if (const TargetRegisterClass *DRC =
               TII->getRegClass(MCID, MONum, TRI, *MF)) {
           if (SubIdx) {
@@ -1449,8 +1505,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
   }
 }
 
-void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
-}
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {}
 
 // This function gets called after visiting all instructions in a bundle. The
 // argument points to the bundle header.
@@ -1559,32 +1614,66 @@ void MachineVerifier::calcRegsRequired() {
 
 // Check PHI instructions at the beginning of MBB. It is assumed that
 // calcRegsPassed has been run so BBInfo::isLiveOut is valid.
-void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
+  BBInfo &MInfo = MBBInfoMap[&MBB];
+
   SmallPtrSet<const MachineBasicBlock*, 8> seen;
-  for (const auto &BBI : *MBB) {
-    if (!BBI.isPHI())
+  for (const MachineInstr &Phi : MBB) {
+    if (!Phi.isPHI())
       break;
     seen.clear();
 
-    for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
-      unsigned Reg = BBI.getOperand(i).getReg();
-      const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB();
-      if (!Pre->isSuccessor(MBB))
+    const MachineOperand &MODef = Phi.getOperand(0);
+    if (!MODef.isReg() || !MODef.isDef()) {
+      report("Expected first PHI operand to be a register def", &MODef, 0);
+      continue;
+    }
+    if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() ||
+        MODef.isEarlyClobber() || MODef.isDebug())
+      report("Unexpected flag on PHI operand", &MODef, 0);
+    unsigned DefReg = MODef.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+      report("Expected first PHI operand to be a virtual register", &MODef, 0);
+
+    for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
+      const MachineOperand &MO0 = Phi.getOperand(I);
+      if (!MO0.isReg()) {
+        report("Expected PHI operand to be a register", &MO0, I);
+        continue;
+      }
+      if (MO0.isImplicit() || MO0.isInternalRead() || MO0.isEarlyClobber() ||
+          MO0.isDebug() || MO0.isTied())
+        report("Unexpected flag on PHI operand", &MO0, I);
+
+      const MachineOperand &MO1 = Phi.getOperand(I + 1);
+      if (!MO1.isMBB()) {
+        report("Expected PHI operand to be a basic block", &MO1, I + 1);
+        continue;
+      }
+
+      const MachineBasicBlock &Pre = *MO1.getMBB();
+      if (!Pre.isSuccessor(&MBB)) {
+        report("PHI input is not a predecessor block", &MO1, I + 1);
         continue;
-      seen.insert(Pre);
-      BBInfo &PrInfo = MBBInfoMap[Pre];
-      if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
-        report("PHI operand is not live-out from predecessor",
-               &BBI.getOperand(i), i);
+      }
+
+      if (MInfo.reachable) {
+        seen.insert(&Pre);
+        BBInfo &PrInfo = MBBInfoMap[&Pre];
+        if (!MO0.isUndef() && PrInfo.reachable &&
+            !PrInfo.isLiveOut(MO0.getReg()))
+          report("PHI operand is not live-out from predecessor", &MO0, I);
+      }
     }
 
     // Did we see all predecessors?
-    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
-           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
-      if (!seen.count(*PrI)) {
-        report("Missing PHI operand", &BBI);
-        errs() << "BB#" << (*PrI)->getNumber()
-            << " is a predecessor according to the CFG.\n";
+    if (MInfo.reachable) {
+      for (MachineBasicBlock *Pred : MBB.predecessors()) {
+        if (!seen.count(Pred)) {
+          report("Missing PHI operand", &Phi);
+          errs() << printMBBReference(*Pred)
+                 << " is a predecessor according to the CFG.\n";
+        }
       }
     }
   }
@@ -1593,15 +1682,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
 void MachineVerifier::visitMachineFunctionAfter() {
   calcRegsPassed();
 
-  for (const auto &MBB : *MF) {
-    BBInfo &MInfo = MBBInfoMap[&MBB];
-
-    // Skip unreachable MBBs.
-    if (!MInfo.reachable)
-      continue;
-
-    checkPHIOps(&MBB);
-  }
+  for (const MachineBasicBlock &MBB : *MF)
+    checkPHIOps(MBB);
 
   // Now check liveness info if available
   calcRegsRequired();
@@ -1614,8 +1696,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
          ++I)
       if (MInfo.regsKilled.count(*I)) {
         report("Virtual register killed in block, but needed live out.", &MBB);
-        errs() << "Virtual register " << PrintReg(*I)
-            << " is used after the block.\n";
+        errs() << "Virtual register " << printReg(*I)
+               << " is used after the block.\n";
       }
   }
 
@@ -1647,14 +1729,14 @@ void MachineVerifier::verifyLiveVariables() {
       if (MInfo.vregsRequired.count(Reg)) {
         if (!VI.AliveBlocks.test(MBB.getNumber())) {
           report("LiveVariables: Block missing from AliveBlocks", &MBB);
-          errs() << "Virtual register " << PrintReg(Reg)
-              << " must be live through the block.\n";
+          errs() << "Virtual register " << printReg(Reg)
+                 << " must be live through the block.\n";
         }
       } else {
         if (VI.AliveBlocks.test(MBB.getNumber())) {
           report("LiveVariables: Block should not be in AliveBlocks", &MBB);
-          errs() << "Virtual register " << PrintReg(Reg)
-              << " is not needed live through the block.\n";
+          errs() << "Virtual register " << printReg(Reg)
+                 << " is not needed live through the block.\n";
         }
       }
     }
@@ -1672,7 +1754,7 @@ void MachineVerifier::verifyLiveIntervals() {
 
     if (!LiveInts->hasInterval(Reg)) {
       report("Missing live interval for virtual register", MF);
-      errs() << PrintReg(Reg, TRI) << " still has defs or uses\n";
+      errs() << printReg(Reg, TRI) << " still has defs or uses\n";
       continue;
     }
 
@@ -1887,7 +1969,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (MOI->isDef()) {
         if (Sub != 0) {
           hasSubRegDef = true;
-          // An operand vreg0:sub0<def> reads vreg0:sub1..n. Invert the lane
+          // An operand %0:sub0 reads %0:sub1..n. Invert the lane
           // mask for subregister defs. Read-undef defs will be handled by
           // readsReg below.
           SLM = ~SLM;
@@ -1935,7 +2017,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
     // Skip this block.
     ++MFI;
   }
-  for (;;) {
+  while (true) {
     assert(LiveInts->isLiveInToMBB(LR, &*MFI));
     // We don't know how to track physregs into a landing pad.
     if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
@@ -1964,8 +2046,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
         report("Register not marked live out of predecessor", *PI);
         report_context(LR, Reg, LaneMask);
         report_context(*VNI);
-        errs() << " live into BB#" << MFI->getNumber()
-               << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
+        errs() << " live into " << printMBBReference(*MFI) << '@'
+               << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
                << PEnd << '\n';
         continue;
       }
@@ -1974,9 +2056,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!IsPHI && PVNI != VNI) {
         report("Different value live out of predecessor", *PI);
         report_context(LR, Reg, LaneMask);
-        errs() << "Valno #" << PVNI->id << " live out of BB#"
-               << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id
-               << " live into BB#" << MFI->getNumber() << '@'
+        errs() << "Valno #" << PVNI->id << " live out of "
+               << printMBBReference(*(*PI)) << '@' << PEnd << "\nValno #"
+               << VNI->id << " live into " << printMBBReference(*MFI) << '@'
                << LiveInts->getMBBStartIdx(&*MFI) << '\n';
       }
     }
@@ -2041,23 +2123,25 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
 }
 
 namespace {
+
   // FrameSetup and FrameDestroy can have zero adjustment, so using a single
   // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the
   // value is zero.
   // We use a bool plus an integer to capture the stack state.
   struct StackStateOfBB {
-    StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false),
-      ExitIsSetup(false) { }
+    StackStateOfBB() = default;
     StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) :
       EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup),
-      ExitIsSetup(ExitSetup) { }
+      ExitIsSetup(ExitSetup) {}
+
     // Can be negative, which means we are setting up a frame.
-    int EntryValue;
-    int ExitValue;
-    bool EntryIsSetup;
-    bool ExitIsSetup;
+    int EntryValue = 0;
+    int ExitValue = 0;
+    bool EntryIsSetup = false;
+    bool ExitIsSetup = false;
   };
-}
+
+} // end anonymous namespace
 
 /// Make sure on every path through the CFG, a FrameSetup <n> is always followed
 /// by a FrameDestroy <n>, stack adjustments are identical on all
@@ -2073,8 +2157,8 @@ void MachineVerifier::verifyStackFrame() {
   df_iterator_default_set<const MachineBasicBlock*> Reachable;
 
   // Visit the MBBs in DFS order.
-  for (df_ext_iterator<const MachineFunction*,
-                       df_iterator_default_set<const MachineBasicBlock*> >
+  for (df_ext_iterator<const MachineFunction *,
+                       df_iterator_default_set<const MachineBasicBlock *>>
        DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
        DFI != DFE; ++DFI) {
     const MachineBasicBlock *MBB = *DFI;
@@ -2125,11 +2209,11 @@ void MachineVerifier::verifyStackFrame() {
           (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
            SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
         report("The exit stack state of a predecessor is inconsistent.", MBB);
-        errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
-            << SPState[(*I)->getNumber()].ExitValue << ", "
-            << SPState[(*I)->getNumber()].ExitIsSetup
-            << "), while BB#" << MBB->getNumber() << " has entry state ("
-            << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
+        errs() << "Predecessor " << printMBBReference(*(*I))
+               << " has exit state (" << SPState[(*I)->getNumber()].ExitValue
+               << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while "
+               << printMBBReference(*MBB) << " has entry state ("
+               << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
       }
     }
 
@@ -2141,11 +2225,11 @@ void MachineVerifier::verifyStackFrame() {
           (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
            SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
         report("The entry stack state of a successor is inconsistent.", MBB);
-        errs() << "Successor BB#" << (*I)->getNumber() << " has entry state ("
-            << SPState[(*I)->getNumber()].EntryValue << ", "
-            << SPState[(*I)->getNumber()].EntryIsSetup
-            << "), while BB#" << MBB->getNumber() << " has exit state ("
-            << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
+        errs() << "Successor " << printMBBReference(*(*I))
+               << " has entry state (" << SPState[(*I)->getNumber()].EntryValue
+               << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while "
+               << printMBBReference(*MBB) << " has exit state ("
+               << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
       }
     }
 
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 633a853b2c74..e7f426c469a0 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -19,10 +19,10 @@
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 
 #define DEBUG_TYPE "machine-scheduler"
 
@@ -33,42 +33,74 @@ using namespace llvm;
 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
 
-static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+static bool isHazard(const SDep &Dep) {
+  return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
+}
+
+static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
                                 SUnit &SecondSU) {
+  // Check that neither instr is already paired with another along the edge
+  // between them.
+  for (SDep &SI : FirstSU.Succs)
+    if (SI.isCluster())
+      return false;
+
+  for (SDep &SI : SecondSU.Preds)
+    if (SI.isCluster())
+      return false;
+  // Though the reachability checks above could be made more generic,
+  // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid,
+  // the extra computation cost makes it less interesting in general cases.
+
   // Create a single weak edge between the adjacent instrs. The only effect is
   // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
-  DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+  if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
+    return false;
 
-  // Adjust the latency between the anchor instr and its
-  // predecessors.
-  for (SDep &IDep : SecondSU.Preds)
-    if (IDep.getSUnit() == &FirstSU)
-      IDep.setLatency(0);
+  // Adjust the latency between both instrs.
+  for (SDep &SI : FirstSU.Succs)
+    if (SI.getSUnit() == &SecondSU)
+      SI.setLatency(0);
 
-  // Adjust the latency between the dependent instr and its
-  // predecessors.
-  for (SDep &IDep : FirstSU.Succs)
-    if (IDep.getSUnit() == &SecondSU)
-      IDep.setLatency(0);
+  for (SDep &SI : SecondSU.Preds)
+    if (SI.getSUnit() == &FirstSU)
+      SI.setLatency(0);
 
-  DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse ";
+  DEBUG(dbgs() << "Macro fuse: ";
         FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
         SecondSU.print(dbgs(), &DAG); dbgs() << " /  ";
         dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " <<
                   DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; );
 
+  // Make data dependencies from the FirstSU also dependent on the SecondSU to
+  // prevent them from being scheduled between the FirstSU and the SecondSU.
   if (&SecondSU != &DAG.ExitSU)
-    // Make instructions dependent on FirstSU also dependent on SecondSU to
-    // prevent them from being scheduled between FirstSU and and SecondSU.
     for (const SDep &SI : FirstSU.Succs) {
-      if (SI.getSUnit() == &SecondSU)
+      SUnit *SU = SI.getSUnit();
+      if (SI.isWeak() || isHazard(SI) ||
+          SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))
+        continue;
+      DEBUG(dbgs() << "  Bind ";
+            SecondSU.print(dbgs(), &DAG); dbgs() << " - ";
+            SU->print(dbgs(), &DAG); dbgs() << '\n';);
+      DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));
+    }
+
+  // Make the FirstSU also dependent on the dependencies of the SecondSU to
+  // prevent them from being scheduled between the FirstSU and the SecondSU.
+  if (&FirstSU != &DAG.EntrySU)
+    for (const SDep &SI : SecondSU.Preds) {
+      SUnit *SU = SI.getSUnit();
+      if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))
         continue;
-      DEBUG(dbgs() << "  Copy Succ ";
-            SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';);
-      DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial));
+      DEBUG(dbgs() << "  Bind ";
+            SU->print(dbgs(), &DAG); dbgs() << " - ";
+            FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);
+      DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));
     }
 
   ++NumFused;
+  return true;
 }
 
 namespace {
@@ -116,9 +148,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
 
   // Explorer for fusion candidates among the dependencies of the anchor instr.
   for (SDep &Dep : AnchorSU.Preds) {
-    // Ignore dependencies that don't enforce ordering.
-    if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output ||
-        Dep.isWeak())
+    // Ignore dependencies other than data or strong ordering.
+    if (Dep.isWeak() || isHazard(Dep))
       continue;
 
     SUnit &DepSU = *Dep.getSUnit();
@@ -129,8 +160,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
     if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
       continue;
 
-    fuseInstructionPair(DAG, DepSU, AnchorSU);
-    return true;
+    if (fuseInstructionPair(DAG, DepSU, AnchorSU))
+      return true;
   }
 
   return false;
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index f7aeb4204c5b..8972867ba083 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -1,4 +1,4 @@
-//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,13 +14,17 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
+#include <cassert>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "opt-phis"
@@ -29,12 +33,14 @@ STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
 STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
 
 namespace {
+
   class OptimizePHIs : public MachineFunctionPass {
     MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
 
   public:
     static char ID; // Pass identification
+
     OptimizePHIs() : MachineFunctionPass(ID) {
       initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
     }
@@ -47,23 +53,26 @@ namespace {
     }
 
   private:
-    typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
-    typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+    using InstrSet = SmallPtrSet<MachineInstr *, 16>;
+    using InstrSetIterator = SmallPtrSetIterator<MachineInstr *>;
 
     bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
                                InstrSet &PHIsInCycle);
     bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
     bool OptimizeBB(MachineBasicBlock &MBB);
   };
-}
+
+} // end anonymous namespace
 
 char OptimizePHIs::ID = 0;
+
 char &llvm::OptimizePHIsID = OptimizePHIs::ID;
+
 INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE,
                 "Optimize machine instruction PHIs", false, false)
 
 bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
-  if (skipFunction(*Fn.getFunction()))
+  if (skipFunction(Fn.getFunction()))
     return false;
 
   MRI = &Fn.getRegInfo();
@@ -144,7 +153,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
   if (PHIsInCycle.size() == 16)
     return false;
 
-  for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) {
+  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DstReg)) {
     if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle))
       return false;
   }
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 9c898fa40d7e..54c5a940275d 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -1,4 +1,4 @@
-//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,24 +14,35 @@
 //===----------------------------------------------------------------------===//
 
 #include "PHIEliminationUtils.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "phi-node-elimination"
@@ -51,6 +62,7 @@ static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
     cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
 
 namespace {
+
   class PHIElimination : public MachineFunctionPass {
     MachineRegisterInfo *MRI; // Machine register information
     LiveVariables *LV;
@@ -58,6 +70,7 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
+
     PHIElimination() : MachineFunctionPass(ID) {
       initializePHIEliminationPass(*PassRegistry::getPassRegistry());
     }
@@ -68,8 +81,8 @@ namespace {
   private:
     /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
     /// in predecessor basic blocks.
-    ///
     bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+
     void LowerPHINode(MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator LastPHIIt);
 
@@ -78,7 +91,6 @@ namespace {
     /// register which is used in a PHI node. We map that to the BB the
     /// vreg is coming from. This is used later to determine when the vreg
     /// is killed in the BB.
-    ///
     void analyzePHINodes(const MachineFunction& Fn);
 
     /// Split critical edges where necessary for good coalescer performance.
@@ -90,8 +102,8 @@ namespace {
     bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB);
     bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB);
 
-    typedef std::pair<unsigned, unsigned> BBVRegPair;
-    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+    using BBVRegPair = std::pair<unsigned, unsigned>;
+    using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
 
     VRegPHIUse VRegPHIUseCount;
 
@@ -99,17 +111,19 @@ namespace {
     SmallPtrSet<MachineInstr*, 4> ImpDefs;
 
     // Map reusable lowered PHI node -> incoming join register.
-    typedef DenseMap<MachineInstr*, unsigned,
-                     MachineInstrExpressionTrait> LoweredPHIMap;
+    using LoweredPHIMap =
+        DenseMap<MachineInstr*, unsigned, MachineInstrExpressionTrait>;
     LoweredPHIMap LoweredPHIs;
   };
-}
+
+} // end anonymous namespace
 
 STATISTIC(NumLowered, "Number of phis lowered");
 STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
 STATISTIC(NumReused, "Number of reused lowered phis");
 
 char PHIElimination::ID = 0;
+
 char& llvm::PHIEliminationID = PHIElimination::ID;
 
 INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE,
@@ -182,7 +196,6 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
 
 /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
 /// predecessor basic blocks.
-///
 bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
                                              MachineBasicBlock &MBB) {
   if (MBB.empty() || !MBB.front().isPHI())
@@ -219,9 +232,7 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
   return true;
 }
 
-
-/// LowerPHINode - Lower the PHI node at the top of the specified block,
-///
+/// LowerPHINode - Lower the PHI node at the top of the specified block.
 void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator LastPHIIt) {
   ++NumLowered;
@@ -259,7 +270,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
       IncomingReg = entry;
       reusedIncoming = true;
       ++NumReused;
-      DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+      DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " << *MPhi);
     } else {
       const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
       entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -534,7 +545,6 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
 /// particular, we want to map the number of uses of a virtual register which is
 /// used in a PHI node. We map that to the BB the vreg is coming from. This is
 /// used later to determine when the vreg is killed in the BB.
-///
 void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
   for (const auto &MBB : MF)
     for (const auto &BBI : MBB) {
@@ -583,9 +593,9 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
       if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
         continue;
       if (ShouldSplit) {
-        DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
-                     << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
-                     << ": " << *BBI);
+        DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge "
+                     << printMBBReference(*PreMBB) << " -> "
+                     << printMBBReference(MBB) << ": " << *BBI);
       }
 
       // If Reg is not live-in to MBB, it means it must be live-in to some
diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp
index 50dd44fa659f..ff8680a0540d 100644
--- a/lib/CodeGen/ParallelCG.cpp
+++ b/lib/CodeGen/ParallelCG.cpp
@@ -19,7 +19,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index 513e82716564..0957705b19bb 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -16,9 +16,9 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -54,11 +54,11 @@ static bool doesNotGeneratecode(const MachineInstr &MI) {
 }
 
 bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
-  if (!MF.getFunction()->hasFnAttribute("patchable-function"))
+  if (!MF.getFunction().hasFnAttribute("patchable-function"))
     return false;
 
 #ifndef NDEBUG
-  Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function");
+  Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function");
   StringRef PatchType = PatchAttr.getValueAsString();
   assert(PatchType == "prologue-short-redirect" && "Only possibility today!");
 #endif
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index b13f6b68c420..45078081987a 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -67,6 +67,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -74,20 +75,23 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 #include <cstdint>
 #include <memory>
@@ -170,11 +174,11 @@ namespace {
     }
 
     /// \brief Track Def -> Use info used for rewriting copies.
-    typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
-        RewriteMapTy;
+    using RewriteMapTy =
+        SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>;
 
     /// \brief Sequence of instructions that formulate recurrence cycle.
-    typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
+    using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
 
   private:
     bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
@@ -195,6 +199,7 @@ namespace {
     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
                        SmallSet<unsigned, 4> &ImmDefRegs,
                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+
     /// \brief Finds recurrence cycles, but only ones that formulated around
     /// a def operand and a use operand that are tied. If there is a use
     /// operand commutable with the tied use operand, find recurrence cycle
@@ -254,7 +259,7 @@ namespace {
   /// maintained with CommutePair.
   class RecurrenceInstr {
   public:
-    typedef std::pair<unsigned, unsigned> IndexPair;
+    using IndexPair = std::pair<unsigned, unsigned>;
 
     RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
     RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
@@ -277,11 +282,12 @@ namespace {
     SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
 
     /// Instruction using the sources in 'RegSrcs'.
-    const MachineInstr *Inst;
+    const MachineInstr *Inst = nullptr;
 
   public:
-    ValueTrackerResult() : Inst(nullptr) {}
-    ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) {
+    ValueTrackerResult() = default;
+
+    ValueTrackerResult(unsigned Reg, unsigned SubReg) {
       addSource(Reg, SubReg);
     }
 
@@ -350,13 +356,17 @@ namespace {
   class ValueTracker {
   private:
     /// The current point into the use-def chain.
-    const MachineInstr *Def;
+    const MachineInstr *Def = nullptr;
+
     /// The index of the definition in Def.
-    unsigned DefIdx;
+    unsigned DefIdx = 0;
+
     /// The sub register index of the definition.
     unsigned DefSubReg;
+
     /// The register where the value can be found.
     unsigned Reg;
+
     /// Specifiy whether or not the value tracking looks through
     /// complex instructions. When this is false, the value tracker
     /// bails on everything that is not a copy or a bitcast.
@@ -365,8 +375,10 @@ namespace {
     /// the ValueTracker class but that would have complicated the code of
     /// the users of this class.
     bool UseAdvancedTracking;
+
     /// MachineRegisterInfo used to perform tracking.
     const MachineRegisterInfo &MRI;
+
     /// Optional TargetInstrInfo used to perform some complex
     /// tracking.
     const TargetInstrInfo *TII;
@@ -374,22 +386,29 @@ namespace {
     /// \brief Dispatcher to the right underlying implementation of
     /// getNextSource.
     ValueTrackerResult getNextSourceImpl();
+
     /// \brief Specialized version of getNextSource for Copy instructions.
     ValueTrackerResult getNextSourceFromCopy();
+
     /// \brief Specialized version of getNextSource for Bitcast instructions.
     ValueTrackerResult getNextSourceFromBitcast();
+
     /// \brief Specialized version of getNextSource for RegSequence
     /// instructions.
     ValueTrackerResult getNextSourceFromRegSequence();
+
     /// \brief Specialized version of getNextSource for InsertSubreg
     /// instructions.
     ValueTrackerResult getNextSourceFromInsertSubreg();
+
     /// \brief Specialized version of getNextSource for ExtractSubreg
     /// instructions.
     ValueTrackerResult getNextSourceFromExtractSubreg();
+
     /// \brief Specialized version of getNextSource for SubregToReg
     /// instructions.
     ValueTrackerResult getNextSourceFromSubregToReg();
+
     /// \brief Specialized version of getNextSource for PHI instructions.
     ValueTrackerResult getNextSourceFromPHI();
 
@@ -410,7 +429,7 @@ namespace {
                  const MachineRegisterInfo &MRI,
                  bool UseAdvancedTracking = false,
                  const TargetInstrInfo *TII = nullptr)
-        : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
+        : DefSubReg(DefSubReg), Reg(Reg),
           UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
       if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
         Def = MRI.getVRegDef(Reg);
@@ -453,6 +472,7 @@ namespace {
 } // end anonymous namespace
 
 char PeepholeOptimizer::ID = 0;
+
 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
 
 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
@@ -659,7 +679,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
 }
 
 /// \brief Check if a simpler conditional branch can be
-// generated
+/// generated
 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
   return TII->optimizeCondBranch(*MI);
 }
@@ -805,13 +825,13 @@ class CopyRewriter {
 protected:
   /// The copy-like instruction.
   MachineInstr &CopyLike;
+
   /// The index of the source being rewritten.
-  unsigned CurrentSrcIdx;
+  unsigned CurrentSrcIdx = 0;
 
 public:
-  CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
-
-  virtual ~CopyRewriter() {}
+  CopyRewriter(MachineInstr &MI) : CopyLike(MI) {}
+  virtual ~CopyRewriter() = default;
 
   /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
   /// the related value that it affects (TrackReg, TrackSubReg).
@@ -944,6 +964,7 @@ class UncoalescableRewriter : public CopyRewriter {
 protected:
   const TargetInstrInfo &TII;
   MachineRegisterInfo   &MRI;
+
   /// The number of defs in the bitcast
   unsigned NumDefs;
 
@@ -958,7 +979,6 @@ public:
   /// All such sources need to be considered rewritable in order to
   /// rewrite a uncoalescable copy-like instruction. This method return
   /// each definition that must be checked if rewritable.
-  ///
   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
                                unsigned &TrackReg,
                                unsigned &TrackSubReg) override {
@@ -1205,7 +1225,7 @@ public:
   }
 };
 
-}  // end anonymous namespace
+} // end anonymous namespace
 
 /// \brief Get the appropriated CopyRewriter for \p MI.
 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
@@ -1433,10 +1453,10 @@ bool PeepholeOptimizer::foldImmediate(
 // only the first copy is considered.
 //
 // e.g.
-// %vreg1 = COPY %vreg0
-// %vreg2 = COPY %vreg0:sub1
+// %1 = COPY %0
+// %2 = COPY %0:sub1
 //
-// Should replace %vreg2 uses with %vreg1:sub1
+// Should replace %2 uses with %1:sub1
 bool PeepholeOptimizer::foldRedundantCopy(
     MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs,
     DenseMap<unsigned, MachineInstr *> &CopyMIs) {
@@ -1496,7 +1516,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
   unsigned DstReg = MI->getOperand(0).getReg();
   unsigned SrcReg = MI->getOperand(1).getReg();
   if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
-    // %vreg = COPY %PHYSREG
+    // %vreg = COPY %physreg
     // Avoid using a datastructure which can track multiple live non-allocatable
     // phys->virt copies since LLVM doesn't seem to do this.
     NAPhysToVirtMIs.insert({SrcReg, MI});
@@ -1506,7 +1526,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
   if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
     return false;
 
-  // %PHYSREG = COPY %vreg
+  // %physreg = COPY %vreg
   auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
   if (PrevCopy == NAPhysToVirtMIs.end()) {
     // We can't remove the copy: there was an intervening clobber of the
@@ -1601,16 +1621,16 @@ bool PeepholeOptimizer::findTargetRecurrence(
 /// from the phi. For example, if there is a recurrence of
 ///
 /// LoopHeader:
-///   %vreg1 = phi(%vreg0, %vreg100)
+///   %1 = phi(%0, %100)
 /// LoopLatch:
-///   %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
+///   %0<def, tied1> = ADD %2<def, tied0>, %1
 ///
-/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
+/// , the fact that %0 and %2 are in the same tied operands set makes
 /// the coalescing of copy instruction generated from the phi in
-/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
-/// %vreg2 have overlapping live range. This introduces additional move
-/// instruction to the final assembly. However, if we commute %vreg2 and
-/// %vreg1 of ADD instruction, the redundant move instruction can be
+/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
+/// %2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %2 and
+/// %1 of ADD instruction, the redundant move instruction can be
 /// avoided.
 bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
   SmallSet<unsigned, 2> TargetRegs;
@@ -1642,7 +1662,7 @@ bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
 }
 
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
@@ -1676,8 +1696,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
     // Track when a non-allocatable physical register is copied to a virtual
     // register so that useless moves can be removed.
     //
-    // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
-    // without any intervening re-definition of %PHYSREG.
+    // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg`
+    // without any intervening re-definition of %physreg.
     DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
 
     // Set of virtual registers that are copied from.
@@ -1847,7 +1867,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n");
         FoldAsLoadDefCandidates.clear();
       }
-
     }
   }
 
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index 4a50d895340a..f9d4a9746e41 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -31,11 +31,11 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "post-RA-hazard-rec"
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f2249f9e37e0..5d86faafdd85 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -25,7 +25,6 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -34,15 +33,15 @@
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "post-RA-sched"
@@ -280,7 +279,7 @@ bool PostRAScheduler::enablePostRAScheduler(
 }
 
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
-  if (skipFunction(*Fn.getFunction()))
+  if (skipFunction(Fn.getFunction()))
     return false;
 
   TII = Fn.getSubtarget().getInstrInfo();
@@ -322,8 +321,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       static int bbcnt = 0;
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
-      dbgs() << "*** DEBUG scheduling " << Fn.getName()
-             << ":BB#" << MBB.getNumber() << " ***\n";
+      dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":"
+             << printMBBReference(MBB) << " ***\n";
     }
 #endif
 
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index fbc2bc64f425..8f88ef78828a 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -1,4 +1,4 @@
-//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===//
+//===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,15 +16,15 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 
 using namespace llvm;
 
-namespace {
-
-bool lowerLoadRelative(Function &F) {
+static bool lowerLoadRelative(Function &F) {
   if (F.use_empty())
     return false;
 
@@ -55,7 +55,7 @@ bool lowerLoadRelative(Function &F) {
   return Changed;
 }
 
-bool lowerIntrinsics(Module &M) {
+static bool lowerIntrinsics(Module &M) {
   bool Changed = false;
   for (Function &F : M) {
     if (F.getName().startswith("llvm.load.relative."))
@@ -64,23 +64,26 @@ bool lowerIntrinsics(Module &M) {
   return Changed;
 }
 
+namespace {
+
 class PreISelIntrinsicLoweringLegacyPass : public ModulePass {
 public:
   static char ID;
+
   PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
 
-  bool runOnModule(Module &M) { return lowerIntrinsics(M); }
+  bool runOnModule(Module &M) override { return lowerIntrinsics(M); }
 };
 
+} // end anonymous namespace
+
 char PreISelIntrinsicLoweringLegacyPass::ID;
-}
 
 INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass,
                 "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering",
                 false, false)
 
-namespace llvm {
-ModulePass *createPreISelIntrinsicLoweringPass() {
+ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
   return new PreISelIntrinsicLoweringLegacyPass;
 }
 
@@ -91,4 +94,3 @@ PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
   else
     return PreservedAnalyses::none();
 }
-} // End llvm namespace
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 0118580a626a..48b48c5f6499 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -13,10 +13,10 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -154,7 +154,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
     if (WorkList.empty())
       continue;
 
-    DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size()
+    DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size()
                  << " implicit defs.\n");
     Changed = true;
 
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e9f8d43fe643..a8d8ad8ac7dc 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1,4 +1,4 @@
-//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,77 +16,83 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "prologepilog"
 
-typedef SmallVector<MachineBasicBlock *, 4> MBBVector;
-static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
-                                   unsigned &MinCSFrameIndex,
-                                   unsigned &MaxCXFrameIndex,
-                                   const MBBVector &SaveBlocks,
-                                   const MBBVector &RestoreBlocks);
+using MBBVector = SmallVector<MachineBasicBlock *, 4>;
 
 namespace {
+
 class PEI : public MachineFunctionPass {
 public:
   static char ID;
+
   PEI() : MachineFunctionPass(ID) {
     initializePEIPass(*PassRegistry::getPassRegistry());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  MachineFunctionProperties getRequiredProperties() const override {
-    MachineFunctionProperties MFP;
-    if (UsesCalleeSaves)
-      MFP.set(MachineFunctionProperties::Property::NoVRegs);
-    return MFP;
-  }
-
   /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
   /// frame indexes with appropriate references.
-  ///
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
 private:
-  std::function<void(MachineFunction &MF, RegScavenger *RS,
-                     unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex,
-                     const MBBVector &SaveBlocks,
-                     const MBBVector &RestoreBlocks)>
-      SpillCalleeSavedRegisters;
-  std::function<void(MachineFunction &MF, RegScavenger &RS)>
-      ScavengeFrameVirtualRegs;
-
-  bool UsesCalleeSaves = false;
-
   RegScavenger *RS;
 
   // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
@@ -108,8 +114,12 @@ private:
   // FrameIndexVirtualScavenging is used.
   bool FrameIndexEliminationScavenging;
 
+  // Emit remarks.
+  MachineOptimizationRemarkEmitter *ORE = nullptr;
+
   void calculateCallFrameInfo(MachineFunction &Fn);
   void calculateSaveRestoreBlocks(MachineFunction &Fn);
+  void spillCalleeSavedRegs(MachineFunction &MF);
 
   void calculateFrameObjectOffsets(MachineFunction &Fn);
   void replaceFrameIndices(MachineFunction &Fn);
@@ -117,9 +127,11 @@ private:
                            int &SPAdj);
   void insertPrologEpilogCode(MachineFunction &Fn);
 };
-} // namespace
+
+} // end anonymous namespace
 
 char PEI::ID = 0;
+
 char &llvm::PrologEpilogCodeInserterID = PEI::ID;
 
 static cl::opt<unsigned>
@@ -132,6 +144,7 @@ INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
 INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
                     "Prologue/Epilogue Insertion & Frame Finalization", false,
                     false)
@@ -148,32 +161,17 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<StackProtector>();
+  AU.addRequired<MachineOptimizationRemarkEmitterPass>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-
 /// StackObjSet - A set of stack object indexes
-typedef SmallSetVector<int, 8> StackObjSet;
+using StackObjSet = SmallSetVector<int, 8>;
 
 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
 /// frame indexes with appropriate references.
-///
 bool PEI::runOnMachineFunction(MachineFunction &Fn) {
-  if (!SpillCalleeSavedRegisters) {
-    const TargetMachine &TM = Fn.getTarget();
-    if (!TM.usesPhysRegsForPEI()) {
-      SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
-                                     unsigned &, unsigned &, const MBBVector &,
-                                     const MBBVector &) {};
-      ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {};
-    } else {
-      SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
-      ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs;
-      UsesCalleeSaves = true;
-    }
-  }
-
-  const Function* F = Fn.getFunction();
+  const Function &F = Fn.getFunction();
   const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
 
@@ -181,6 +179,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
   FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
     TRI->requiresFrameIndexReplacementScavenging(Fn);
+  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
 
   // Calculate the MaxCallFrameSize and AdjustsStack variables for the
   // function's frame information. Also eliminates call frame pseudo
@@ -192,8 +191,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   calculateSaveRestoreBlocks(Fn);
 
   // Handle CSR spilling and restoring, for targets that need it.
-  SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex,
-                            SaveBlocks, RestoreBlocks);
+  if (Fn.getTarget().usesPhysRegsForPEI())
+    spillCalleeSavedRegs(Fn);
 
   // Allow the target machine to make final modifications to the function
   // before the frame layout is finalized.
@@ -207,7 +206,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // called functions.  Because of this, calculateCalleeSavedRegisters()
   // must be called before this function in order to set the AdjustsStack
   // and MaxCallFrameSize variables.
-  if (!F->hasFnAttribute(Attribute::Naked))
+  if (!F.hasFnAttribute(Attribute::Naked))
     insertPrologEpilogCode(Fn);
 
   // Replace all MO_FrameIndex operands with physical register references
@@ -218,19 +217,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // If register scavenging is needed, as we've enabled doing it as a
   // post-pass, scavenge the virtual registers that frame index elimination
   // inserted.
-  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
-      ScavengeFrameVirtualRegs(Fn, *RS);
-
-      // Clear any vregs created by virtual scavenging.
-      Fn.getRegInfo().clearVirtRegs();
-  }
+  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+    scavengeFrameVirtualRegs(Fn, *RS);
 
   // Warn on stack size when we exceeds the given limit.
   MachineFrameInfo &MFI = Fn.getFrameInfo();
   uint64_t StackSize = MFI.getStackSize();
   if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
-    DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
-    F->getContext().diagnose(DiagStackSize);
+    DiagnosticInfoStackSize DiagStackSize(F, StackSize);
+    F.getContext().diagnose(DiagStackSize);
   }
 
   delete RS;
@@ -459,87 +454,63 @@ static void updateLiveness(MachineFunction &MF) {
   }
 }
 
-/// insertCSRSpillsAndRestores - Insert spill and restore code for
-/// callee saved registers used in the function.
-///
-static void insertCSRSpillsAndRestores(MachineFunction &Fn,
-                                       const MBBVector &SaveBlocks,
-                                       const MBBVector &RestoreBlocks) {
-  // Get callee saved register information.
-  MachineFrameInfo &MFI = Fn.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
-
-  MFI.setCalleeSavedInfoValid(true);
-
-  // Early exit if no callee saved registers are modified!
-  if (CSI.empty())
-    return;
-
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRSaves(MachineBasicBlock &SaveBlock,
+                           ArrayRef<CalleeSavedInfo> CSI) {
+  MachineFunction &Fn = *SaveBlock.getParent();
   const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
   const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
-  MachineBasicBlock::iterator I;
-
-  // Spill using target interface.
-  for (MachineBasicBlock *SaveBlock : SaveBlocks) {
-    I = SaveBlock->begin();
-    if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
-      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-        // Insert the spill to the stack frame.
-        unsigned Reg = CSI[i].getReg();
-        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-        TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
-                                RC, TRI);
-      }
+
+  MachineBasicBlock::iterator I = SaveBlock.begin();
+  if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
+    for (const CalleeSavedInfo &CS : CSI) {
+      // Insert the spill to the stack frame.
+      unsigned Reg = CS.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+                              TRI);
     }
-    // Update the live-in information of all the blocks up to the save point.
-    updateLiveness(Fn);
   }
+}
 
-  // Restore using target interface.
-  for (MachineBasicBlock *MBB : RestoreBlocks) {
-    I = MBB->end();
-
-    // Skip over all terminator instructions, which are part of the return
-    // sequence.
-    MachineBasicBlock::iterator I2 = I;
-    while (I2 != MBB->begin() && (--I2)->isTerminator())
-      I = I2;
-
-    bool AtStart = I == MBB->begin();
-    MachineBasicBlock::iterator BeforeI = I;
-    if (!AtStart)
-      --BeforeI;
-
-    // Restore all registers immediately before the return and any
-    // terminators that precede it.
-    if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
-      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-        unsigned Reg = CSI[i].getReg();
-        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-        TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
-        assert(I != MBB->begin() &&
-               "loadRegFromStackSlot didn't insert any code!");
-        // Insert in reverse order.  loadRegFromStackSlot can insert
-        // multiple instructions.
-        if (AtStart)
-          I = MBB->begin();
-        else {
-          I = BeforeI;
-          ++I;
-        }
-      }
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
+                              std::vector<CalleeSavedInfo> &CSI) {
+  MachineFunction &Fn = *RestoreBlock.getParent();
+  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+
+  // Restore all registers immediately before the return and any
+  // terminators that precede it.
+  MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
+
+  if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
+    for (const CalleeSavedInfo &CI : reverse(CSI)) {
+      unsigned Reg = CI.getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+      assert(I != RestoreBlock.begin() &&
+             "loadRegFromStackSlot didn't insert any code!");
+      // Insert in reverse order.  loadRegFromStackSlot can insert
+      // multiple instructions.
     }
   }
 }
 
-static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
-                                   unsigned &MinCSFrameIndex,
-                                   unsigned &MaxCSFrameIndex,
-                                   const MBBVector &SaveBlocks,
-                                   const MBBVector &RestoreBlocks) {
-  const Function *F = Fn.getFunction();
+void PEI::spillCalleeSavedRegs(MachineFunction &Fn) {
+  // We can't list this requirement in getRequiredProperties because some
+  // targets (WebAssembly) use virtual registers past this point, and the pass
+  // pipeline is set up without giving the passes a chance to look at the
+  // TargetMachine.
+  // FIXME: Find a way to express this in getRequiredProperties.
+  assert(Fn.getProperties().hasProperty(
+      MachineFunctionProperties::Property::NoVRegs));
+
+  const Function &F = Fn.getFunction();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
   MinCSFrameIndex = std::numeric_limits<unsigned>::max();
   MaxCSFrameIndex = 0;
 
@@ -551,8 +522,21 @@ static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
   assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);
 
   // Add the code to save and restore the callee saved registers.
-  if (!F->hasFnAttribute(Attribute::Naked))
-    insertCSRSpillsAndRestores(Fn, SaveBlocks, RestoreBlocks);
+  if (!F.hasFnAttribute(Attribute::Naked)) {
+    MFI.setCalleeSavedInfoValid(true);
+
+    std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+    if (!CSI.empty()) {
+      for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+        insertCSRSaves(*SaveBlock, CSI);
+        // Update the live-in information of all the blocks up to the save
+        // point.
+        updateLiveness(Fn);
+      }
+      for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+        insertCSRRestores(*RestoreBlock, CSI);
+    }
+  }
 }
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
@@ -585,7 +569,6 @@ AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
 
 /// Compute which bytes of fixed and callee-save stack area are unused and keep
 /// track of them in StackBytesFree.
-///
 static inline void
 computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
                       unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
@@ -626,7 +609,6 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
 
 /// Assign frame object to an unused portion of the stack in the fixed stack
 /// object range.  Return true if the allocation was successful.
-///
 static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
                                      bool StackGrowsDown, unsigned MaxAlign,
                                      BitVector &StackBytesFree) {
@@ -703,7 +685,6 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
 
 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
 /// abstract stack objects.
-///
 void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   StackProtector *SP = &getAnalysis<StackProtector>();
@@ -825,7 +806,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   }
 
   // Retrieve the Exception Handler registration node.
-  int EHRegNodeFrameIndex = INT_MAX;
+  int EHRegNodeFrameIndex = std::numeric_limits<int>::max();
   if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo())
     EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;
 
@@ -903,7 +884,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   }
 
   // Allocate the EH registration node first if one is present.
-  if (EHRegNodeFrameIndex != INT_MAX)
+  if (EHRegNodeFrameIndex != std::numeric_limits<int>::max())
     AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
                       MaxAlign, Skew);
 
@@ -968,12 +949,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   int64_t StackSize = Offset - LocalAreaOffset;
   MFI.setStackSize(StackSize);
   NumBytesStackSpace += StackSize;
+
+  ORE->emit([&]() {
+    return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize",
+                                             Fn.getFunction().getSubprogram(),
+                                             &Fn.front())
+           << ore::NV("NumStackBytes", StackSize) << " stack bytes in function";
+  });
 }
 
 /// insertPrologEpilogCode - Scan the function for modified callee saved
 /// registers, insert spill code for these callee saved registers, then add
 /// prolog and epilog code to the function.
-///
 void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
 
@@ -995,21 +982,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   if (Fn.shouldSplitStack()) {
     for (MachineBasicBlock *SaveBlock : SaveBlocks)
       TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
-  }
+    // Record that there are split-stack functions, so we will emit a
+    // special section to tell the linker.
+    Fn.getMMI().setHasSplitStack(true);
+  } else
+    Fn.getMMI().setHasNosplitStack(true);
 
   // Emit additional code that is required to explicitly handle the stack in
   // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
   // approach is rather similar to that of Segmented Stacks, but it uses a
   // different conditional check and another BIF for allocating more stack
   // space.
-  if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+  if (Fn.getFunction().getCallingConv() == CallingConv::HiPE)
     for (MachineBasicBlock *SaveBlock : SaveBlocks)
       TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
 }
 
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
 /// register references and actual offsets.
-///
 void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   if (!TFI.needsFrameIndexResolution(Fn)) return;
@@ -1059,7 +1049,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
   bool InsideCallSequence = false;
 
   for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
-
     if (TII.isFrameInstr(*I)) {
       InsideCallSequence = TII.isFrameSetup(*I);
       SPAdj += TII.getSPAdjust(*I);
@@ -1081,11 +1070,12 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
         assert(i == 0 && "Frame indices can only appear as the first "
                          "operand of a DBG_VALUE machine instruction");
         unsigned Reg;
-        MachineOperand &Offset = MI.getOperand(1);
-        Offset.setImm(
-            Offset.getImm() +
-            TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg));
+        int64_t Offset =
+            TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg);
         MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+        auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
+                                             DIExpression::NoDeref, Offset);
+        MI.getOperand(3).setMetadata(DIExpr);
         continue;
       }
 
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index b29e62bf1aa3..86fd87450521 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -24,7 +25,11 @@ static const char *const PSVNames[] = {
     "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
     "GlobalValueCallEntry", "ExternalSymbolCallEntry"};
 
-PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
+PseudoSourceValue::PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII)
+    : Kind(Kind) {
+  AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind);
+}
+
 
 PseudoSourceValue::~PseudoSourceValue() {}
 
@@ -75,8 +80,9 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
   OS << "FixedStack" << FI;
 }
 
-CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind)
-    : PseudoSourceValue(Kind) {}
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(
+    PSVKind Kind, const TargetInstrInfo &TII)
+    : PseudoSourceValue(Kind, TII) {}
 
 bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
   return false;
@@ -91,16 +97,20 @@ bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
 }
 
 GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
-    const GlobalValue *GV)
-    : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {}
-
-ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES)
-    : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {}
-
-PseudoSourceValueManager::PseudoSourceValueManager()
-    : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT),
-      JumpTablePSV(PseudoSourceValue::JumpTable),
-      ConstantPoolPSV(PseudoSourceValue::ConstantPool) {}
+    const GlobalValue *GV,
+    const TargetInstrInfo &TII)
+    : CallEntryPseudoSourceValue(GlobalValueCallEntry, TII), GV(GV) {}
+ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(
+    const char *ES, const TargetInstrInfo &TII)
+    : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TII), ES(ES) {}
+
+PseudoSourceValueManager::PseudoSourceValueManager(
+    const TargetInstrInfo &TIInfo)
+    : TII(TIInfo),
+      StackPSV(PseudoSourceValue::Stack, TII),
+      GOTPSV(PseudoSourceValue::GOT, TII),
+      JumpTablePSV(PseudoSourceValue::JumpTable, TII),
+      ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {}
 
 const PseudoSourceValue *PseudoSourceValueManager::getStack() {
   return &StackPSV;
@@ -116,10 +126,11 @@ const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
   return &JumpTablePSV;
 }
 
-const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) {
+const PseudoSourceValue *
+PseudoSourceValueManager::getFixedStack(int FI) {
   std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
   if (!V)
-    V = llvm::make_unique<FixedStackPseudoSourceValue>(FI);
+    V = llvm::make_unique<FixedStackPseudoSourceValue>(FI, TII);
   return V.get();
 }
 
@@ -128,7 +139,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
   std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
       GlobalCallEntries[GV];
   if (!E)
-    E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV);
+    E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
   return E.get();
 }
 
@@ -137,6 +148,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
   std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
       ExternalCallEntries[ES];
   if (!E)
-    E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES);
+    E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
   return E.get();
 }
diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt
index 8f19e432ab79..2fcbd1280da4 100644
--- a/lib/CodeGen/README.txt
+++ b/lib/CodeGen/README.txt
@@ -33,7 +33,7 @@ It also increase the likelihood the store may become dead.
 bb27 ...
         ...
         %reg1037 = ADDri %reg1039, 1
-        %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+        %reg1038 = ADDrs %reg1032, %reg1039, %noreg, 10
     Successors according to CFG: 0x8b03bf0 (#5)
 
 bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 7b4fbace2c1c..74c1592634aa 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//===- RegAllocBase.cpp - Register Allocator Base Class -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,19 +14,22 @@
 
 #include "RegAllocBase.h"
 #include "Spiller.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
 
 using namespace llvm;
 
@@ -37,8 +40,8 @@ STATISTIC(NumNewQueued    , "Number of new live ranges queued");
 // Temporary verification option until we can put verification inside
 // MachineVerifier.
 static cl::opt<bool, true>
-VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
-               cl::desc("Verify during register allocation"));
+    VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+                   cl::Hidden, cl::desc("Verify during register allocation"));
 
 const char RegAllocBase::TimerGroupName[] = "regalloc";
 const char RegAllocBase::TimerGroupDescription[] = "Register Allocation";
@@ -103,7 +106,9 @@ void RegAllocBase::allocatePhysRegs() {
     DEBUG(dbgs() << "\nselectOrSplit "
           << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
           << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
-    typedef SmallVector<unsigned, 4> VirtRegVec;
+
+    using VirtRegVec = SmallVector<unsigned, 4>;
+
     VirtRegVec SplitVRegs;
     unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
 
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index d8921b5ce6db..686ffc36e049 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -1,4 +1,4 @@
-//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -37,17 +37,20 @@
 #ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H
 #define LLVM_LIB_CODEGEN_REGALLOCBASE_H
 
-#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 
 namespace llvm {
 
-template<typename T> class SmallVectorImpl;
-class TargetRegisterInfo;
-class VirtRegMap;
+class LiveInterval;
 class LiveIntervals;
 class LiveRegMatrix;
+class MachineInstr;
+class MachineRegisterInfo;
+template<typename T> class SmallVectorImpl;
 class Spiller;
+class TargetRegisterInfo;
+class VirtRegMap;
 
 /// RegAllocBase provides the register allocation driver and interface that can
 /// be extended to add interesting heuristics.
@@ -57,12 +60,13 @@ class Spiller;
 /// assignment order.
 class RegAllocBase {
   virtual void anchor();
+
 protected:
-  const TargetRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
-  VirtRegMap *VRM;
-  LiveIntervals *LIS;
-  LiveRegMatrix *Matrix;
+  const TargetRegisterInfo *TRI = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  VirtRegMap *VRM = nullptr;
+  LiveIntervals *LIS = nullptr;
+  LiveRegMatrix *Matrix = nullptr;
   RegisterClassInfo RegClassInfo;
 
   /// Inst which is a def of an original reg and whose defs are already all
@@ -71,10 +75,8 @@ protected:
   /// always available for the remat of all the siblings of the original reg.
   SmallPtrSet<MachineInstr *, 32> DeadRemats;
 
-  RegAllocBase()
-    : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
-
-  virtual ~RegAllocBase() {}
+  RegAllocBase() = default;
+  virtual ~RegAllocBase() = default;
 
   // A RegAlloc pass should call this before allocatePhysRegs.
   void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
@@ -120,4 +122,4 @@ private:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_REGALLOCBASE_H
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 774306154a89..6e273277804b 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -18,7 +18,7 @@
 #include "Spiller.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -29,11 +29,11 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/PassAnalysisSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cstdlib>
 #include <queue>
 
@@ -143,14 +143,17 @@ INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
                     false)
 
 bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
+  LiveInterval &LI = LIS->getInterval(VirtReg);
   if (VRM->hasPhys(VirtReg)) {
-    LiveInterval &LI = LIS->getInterval(VirtReg);
     Matrix->unassign(LI);
     aboutToRemoveInterval(LI);
     return true;
   }
   // Unassigned virtreg is probably in the priority queue.
   // RegAllocBase will erase it after dequeueing.
+  // Nonetheless, clear the live-range so that the debug
+  // dump will show the right state for that VirtReg.
+  LI.clear();
   return false;
 }
 
@@ -216,8 +219,8 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
       Intfs.push_back(Intf);
     }
   }
-  DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
-        " interferences with " << VirtReg << "\n");
+  DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI)
+               << " interferences with " << VirtReg << "\n");
   assert(!Intfs.empty() && "expected interference");
 
   // Spill each interfering vreg allocated to PhysReg or an alias.
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index d5538be4bba2..6a5282cbbbff 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//===- RegAllocFast.cpp - A fast register allocator for debug code --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,32 +7,47 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This register allocator allocates registers to a basic block at a time,
-// attempting to keep values in registers and reusing registers as appropriate.
+/// \file This register allocator allocates registers to a basic block at a
+/// time, attempting to keep values in registers and reusing registers as
+/// appropriate.
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <tuple>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "regalloc"
@@ -45,109 +60,106 @@ static RegisterRegAlloc
   fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
 
 namespace {
-  class RAFast : public MachineFunctionPass {
+
+  class RegAllocFast : public MachineFunctionPass {
   public:
     static char ID;
-    RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
-               isBulkSpilling(false) {}
+
+    RegAllocFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
 
   private:
-    MachineFunction *MF;
+    MachineFrameInfo *MFI;
     MachineRegisterInfo *MRI;
     const TargetRegisterInfo *TRI;
     const TargetInstrInfo *TII;
     RegisterClassInfo RegClassInfo;
 
-    // Basic block currently being allocated.
+    /// Basic block currently being allocated.
     MachineBasicBlock *MBB;
 
-    // StackSlotForVirtReg - Maps virtual regs to the frame index where these
-    // values are spilled.
+    /// Maps virtual regs to the frame index where these values are spilled.
     IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
 
-    // Everything we know about a live virtual register.
+    /// Everything we know about a live virtual register.
     struct LiveReg {
-      MachineInstr *LastUse;    // Last instr to use reg.
-      unsigned VirtReg;         // Virtual register number.
-      unsigned PhysReg;         // Currently held here.
-      unsigned short LastOpNum; // OpNum on LastUse.
-      bool Dirty;               // Register needs spill.
+      MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
+      unsigned VirtReg;                ///< Virtual register number.
+      MCPhysReg PhysReg = 0;           ///< Currently held here.
+      unsigned short LastOpNum = 0;    ///< OpNum on LastUse.
+      bool Dirty = false;              ///< Register needs spill.
 
-      explicit LiveReg(unsigned v)
-        : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){}
+      explicit LiveReg(unsigned v) : VirtReg(v) {}
 
       unsigned getSparseSetIndex() const {
         return TargetRegisterInfo::virtReg2Index(VirtReg);
       }
     };
 
-    typedef SparseSet<LiveReg> LiveRegMap;
+    using LiveRegMap = SparseSet<LiveReg>;
 
-    // LiveVirtRegs - This map contains entries for each virtual register
-    // that is currently available in a physical register.
+    /// This map contains entries for each virtual register that is currently
+    /// available in a physical register.
     LiveRegMap LiveVirtRegs;
 
-    DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
+    DenseMap<unsigned, SmallVector<MachineInstr *, 4>> LiveDbgValueMap;
 
-    // RegState - Track the state of a physical register.
+    /// Track the state of a physical register.
     enum RegState {
-      // A disabled register is not available for allocation, but an alias may
-      // be in use. A register can only be moved out of the disabled state if
-      // all aliases are disabled.
+      /// A disabled register is not available for allocation, but an alias may
+      /// be in use. A register can only be moved out of the disabled state if
+      /// all aliases are disabled.
       regDisabled,
 
-      // A free register is not currently in use and can be allocated
-      // immediately without checking aliases.
+      /// A free register is not currently in use and can be allocated
+      /// immediately without checking aliases.
       regFree,
 
-      // A reserved register has been assigned explicitly (e.g., setting up a
-      // call parameter), and it remains reserved until it is used.
+      /// A reserved register has been assigned explicitly (e.g., setting up a
+      /// call parameter), and it remains reserved until it is used.
       regReserved
 
-      // A register state may also be a virtual register number, indication that
-      // the physical register is currently allocated to a virtual register. In
-      // that case, LiveVirtRegs contains the inverse mapping.
+      /// A register state may also be a virtual register number, indication
+      /// that the physical register is currently allocated to a virtual
+      /// register. In that case, LiveVirtRegs contains the inverse mapping.
     };
 
-    // PhysRegState - One of the RegState enums, or a virtreg.
+    /// One of the RegState enums, or a virtreg.
     std::vector<unsigned> PhysRegState;
 
-    // Set of register units.
-    typedef SparseSet<unsigned> UsedInInstrSet;
+    SmallVector<unsigned, 16> VirtDead;
+    SmallVector<MachineInstr *, 32> Coalesced;
+
+    /// Set of register units.
+    using UsedInInstrSet = SparseSet<unsigned>;
 
-    // Set of register units that are used in the current instruction, and so
-    // cannot be allocated.
+    /// Set of register units that are used in the current instruction, and so
+    /// cannot be allocated.
     UsedInInstrSet UsedInInstr;
 
-    // Mark a physreg as used in this instruction.
-    void markRegUsedInInstr(unsigned PhysReg) {
+    /// Mark a physreg as used in this instruction.
+    void markRegUsedInInstr(MCPhysReg PhysReg) {
       for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
         UsedInInstr.insert(*Units);
     }
 
-    // Check if a physreg or any of its aliases are used in this instruction.
-    bool isRegUsedInInstr(unsigned PhysReg) const {
+    /// Check if a physreg or any of its aliases are used in this instruction.
+    bool isRegUsedInInstr(MCPhysReg PhysReg) const {
       for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
         if (UsedInInstr.count(*Units))
           return true;
       return false;
     }
 
-    // SkippedInstrs - Descriptors of instructions whose clobber list was
-    // ignored because all registers were spilled. It is still necessary to
-    // mark all the clobbered registers as used by the function.
-    SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
-
-    // isBulkSpilling - This flag is set when LiveRegMap will be cleared
-    // completely after spilling all live registers. LiveRegMap entries should
-    // not be erased.
-    bool isBulkSpilling;
+    /// This flag is set when LiveRegMap will be cleared completely after
+    /// spilling all live registers. LiveRegMap entries should not be erased.
+    bool isBulkSpilling = false;
 
     enum : unsigned {
       spillClean = 1,
       spillDirty = 100,
       spillImpossible = ~0u
     };
+
   public:
     StringRef getPassName() const override { return "Fast Register Allocator"; }
 
@@ -168,29 +180,32 @@ namespace {
 
   private:
     bool runOnMachineFunction(MachineFunction &Fn) override;
-    void AllocateBasicBlock();
-    void handleThroughOperands(MachineInstr *MI,
+    void allocateBasicBlock(MachineBasicBlock &MBB);
+    void handleThroughOperands(MachineInstr &MI,
                                SmallVectorImpl<unsigned> &VirtDead);
-    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
-    bool isLastUseOfLocalReg(MachineOperand&);
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass &RC);
+    bool isLastUseOfLocalReg(const MachineOperand &MO) const;
 
-    void addKillFlag(const LiveReg&);
-    void killVirtReg(LiveRegMap::iterator);
+    void addKillFlag(const LiveReg &LRI);
+    void killVirtReg(LiveRegMap::iterator LRI);
     void killVirtReg(unsigned VirtReg);
     void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
     void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
 
-    void usePhysReg(MachineOperand&);
-    void definePhysReg(MachineInstr &MI, unsigned PhysReg, RegState NewState);
-    unsigned calcSpillCost(unsigned PhysReg) const;
-    void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+    void usePhysReg(MachineOperand &MO);
+    void definePhysReg(MachineInstr &MI, MCPhysReg PhysReg, RegState NewState);
+    unsigned calcSpillCost(MCPhysReg PhysReg) const;
+    void assignVirtToPhysReg(LiveReg&, MCPhysReg PhysReg);
+
     LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
       return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
     }
+
     LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
       return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
     }
-    LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+
+    LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, MCPhysReg PhysReg);
     LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
                                       unsigned Hint);
     LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
@@ -198,35 +213,41 @@ namespace {
     LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
                                        unsigned VirtReg, unsigned Hint);
     void spillAll(MachineBasicBlock::iterator MI);
-    bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+    bool setPhysReg(MachineInstr &MI, unsigned OpNum, MCPhysReg PhysReg);
+
+    void dumpState();
   };
-  char RAFast::ID = 0;
-}
 
-INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false)
+} // end anonymous namespace
+
+char RegAllocFast::ID = 0;
+
+INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
+                false)
 
-/// getStackSpaceFor - This allocates space for the specified virtual register
-/// to be held on the stack.
-int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+/// This allocates space for the specified virtual register to be held on the
+/// stack.
+int RegAllocFast::getStackSpaceFor(unsigned VirtReg,
+                                   const TargetRegisterClass &RC) {
   // Find the location Reg would belong...
   int SS = StackSlotForVirtReg[VirtReg];
+  // Already has space allocated?
   if (SS != -1)
-    return SS;          // Already has space allocated?
+    return SS;
 
   // Allocate a new stack object for this spill location...
-  unsigned Size = TRI->getSpillSize(*RC);
-  unsigned Align = TRI->getSpillAlignment(*RC);
-  int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align);
+  unsigned Size = TRI->getSpillSize(RC);
+  unsigned Align = TRI->getSpillAlignment(RC);
+  int FrameIdx = MFI->CreateSpillStackObject(Size, Align);
 
   // Assign the slot.
   StackSlotForVirtReg[VirtReg] = FrameIdx;
   return FrameIdx;
 }
 
-/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
-/// its virtual register, and it is guaranteed to be a block-local register.
-///
-bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+/// Return true if MO is the only remaining reference to its virtual register,
+/// and it is guaranteed to be a block-local register.
+bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
   // If the register has ever been spilled or reloaded, we conservatively assume
   // it is a global register used in multiple blocks.
   if (StackSlotForVirtReg[MO.getReg()] != -1)
@@ -239,8 +260,8 @@ bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
   return ++I == MRI->reg_nodbg_end();
 }
 
-/// addKillFlag - Set kill flags on last use of a virtual register.
-void RAFast::addKillFlag(const LiveReg &LR) {
+/// Set kill flags on last use of a virtual register.
+void RegAllocFast::addKillFlag(const LiveReg &LR) {
   if (!LR.LastUse) return;
   MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
   if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
@@ -250,7 +271,7 @@ void RAFast::addKillFlag(const LiveReg &LR) {
     // subreg of this register and given we don't track which
     // lanes are actually dead, we cannot insert a kill flag here.
     // Otherwise we may end up in a situation like this:
-    // ... = (MO) physreg:sub1, physreg <implicit-use, kill>
+    // ... = (MO) physreg:sub1, implicit killed physreg
     // ... <== Here we would allow later pass to reuse physreg:sub1
     //         which is potentially wrong.
     // LR:sub0 = ...
@@ -258,8 +279,8 @@ void RAFast::addKillFlag(const LiveReg &LR) {
   }
 }
 
-/// killVirtReg - Mark virtreg as no longer available.
-void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+/// Mark virtreg as no longer available.
+void RegAllocFast::killVirtReg(LiveRegMap::iterator LRI) {
   addKillFlag(*LRI);
   assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
          "Broken RegState mapping");
@@ -269,8 +290,8 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
     LiveVirtRegs.erase(LRI);
 }
 
-/// killVirtReg - Mark virtreg as no longer available.
-void RAFast::killVirtReg(unsigned VirtReg) {
+/// Mark virtreg as no longer available.
+void RegAllocFast::killVirtReg(unsigned VirtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "killVirtReg needs a virtual register");
   LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -278,9 +299,10 @@ void RAFast::killVirtReg(unsigned VirtReg) {
     killVirtReg(LRI);
 }
 
-/// spillVirtReg - This method spills the value specified by VirtReg into the
-/// corresponding stack slot if needed.
-void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+/// This method spills the value specified by VirtReg into the corresponding
+/// stack slot if needed.
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
+                                unsigned VirtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Spilling a physical register is illegal!");
   LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -288,9 +310,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
   spillVirtReg(MI, LRI);
 }
 
-/// spillVirtReg - Do the actual work of spilling.
-void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
-                          LiveRegMap::iterator LRI) {
+/// Do the actual work of spilling.
+void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
+                                LiveRegMap::iterator LRI) {
   LiveReg &LR = *LRI;
   assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
 
@@ -299,12 +321,12 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // instruction, not on the spill.
     bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
     LR.Dirty = false;
-    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
-                 << " in " << PrintReg(LR.PhysReg, TRI));
-    const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+    DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI)
+                 << " in " << printReg(LR.PhysReg, TRI));
+    const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg);
     int FI = getStackSpaceFor(LRI->VirtReg, RC);
     DEBUG(dbgs() << " to stack slot #" << FI << "\n");
-    TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+    TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI);
     ++NumStores;   // Update statistics
 
     // If this register is used by DBG_VALUE then insert new DBG_VALUE to
@@ -312,8 +334,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // value.
     SmallVectorImpl<MachineInstr *> &LRIDbgValues =
       LiveDbgValueMap[LRI->VirtReg];
-    for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
-      MachineInstr *DBG = LRIDbgValues[li];
+    for (MachineInstr *DBG : LRIDbgValues) {
       MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
       assert(NewDV->getParent() == MBB && "dangling parent pointer");
       (void)NewDV;
@@ -329,32 +350,31 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
   killVirtReg(LRI);
 }
 
-/// spillAll - Spill all dirty virtregs without killing them.
-void RAFast::spillAll(MachineBasicBlock::iterator MI) {
+/// Spill all dirty virtregs without killing them.
+void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
   if (LiveVirtRegs.empty()) return;
   isBulkSpilling = true;
   // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
   // of spilling here is deterministic, if arbitrary.
-  for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
-       i != e; ++i)
-    spillVirtReg(MI, i);
+  for (LiveRegMap::iterator I = LiveVirtRegs.begin(), E = LiveVirtRegs.end();
+       I != E; ++I)
+    spillVirtReg(MI, I);
   LiveVirtRegs.clear();
   isBulkSpilling = false;
 }
 
-/// usePhysReg - Handle the direct use of a physical register.
-/// Check that the register is not used by a virtreg.
-/// Kill the physreg, marking it free.
-/// This may add implicit kills to MO->getParent() and invalidate MO.
-void RAFast::usePhysReg(MachineOperand &MO) {
-  unsigned PhysReg = MO.getReg();
-  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
-         "Bad usePhysReg operand");
-
+/// Handle the direct use of a physical register.  Check that the register is
+/// not used by a virtreg. Kill the physreg, marking it free. This may add
+/// implicit kills to MO->getParent() and invalidate MO.
+void RegAllocFast::usePhysReg(MachineOperand &MO) {
   // Ignore undef uses.
   if (MO.isUndef())
     return;
 
+  unsigned PhysReg = MO.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+         "Bad usePhysReg operand");
+
   markRegUsedInInstr(PhysReg);
   switch (PhysRegState[PhysReg]) {
   case regDisabled:
@@ -373,7 +393,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
 
   // Maybe a superregister is reserved?
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
-    unsigned Alias = *AI;
+    MCPhysReg Alias = *AI;
     switch (PhysRegState[Alias]) {
     case regDisabled:
       break;
@@ -411,11 +431,11 @@ void RAFast::usePhysReg(MachineOperand &MO) {
   MO.setIsKill();
 }
 
-/// definePhysReg - Mark PhysReg as reserved or free after spilling any
-/// virtregs. This is very similar to defineVirtReg except the physreg is
-/// reserved instead of allocated.
-void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
-                           RegState NewState) {
+/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
+/// similar to defineVirtReg except the physreg is reserved instead of
+/// allocated.
+void RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg PhysReg,
+                                 RegState NewState) {
   markRegUsedInInstr(PhysReg);
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
@@ -432,7 +452,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
   // This is a disabled register, disable all aliases.
   PhysRegState[PhysReg] = NewState;
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
-    unsigned Alias = *AI;
+    MCPhysReg Alias = *AI;
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
       break;
@@ -449,15 +469,13 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
   }
 }
 
-
-// calcSpillCost - Return the cost of spilling clearing out PhysReg and
-// aliases so it is free for allocation.
-// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
-// can be allocated directly.
-// Returns spillImpossible when PhysReg or an alias can't be spilled.
-unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+/// \brief Return the cost of spilling clearing out PhysReg and aliases so it is
+/// free for allocation. Returns 0 when PhysReg is free or disabled with all
+/// aliases disabled - it can be allocated directly.
+/// \returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
   if (isRegUsedInInstr(PhysReg)) {
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
+    DEBUG(dbgs() << printReg(PhysReg, TRI) << " is already used in instr.\n");
     return spillImpossible;
   }
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
@@ -466,8 +484,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   case regFree:
     return 0;
   case regReserved:
-    DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
-                 << PrintReg(PhysReg, TRI) << " is reserved already.\n");
+    DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+                 << printReg(PhysReg, TRI) << " is reserved already.\n");
     return spillImpossible;
   default: {
     LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
@@ -477,10 +495,10 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   }
 
   // This is a disabled register, add up cost of aliases.
-  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
+  DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
   unsigned Cost = 0;
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
-    unsigned Alias = *AI;
+    MCPhysReg Alias = *AI;
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
       break;
@@ -500,45 +518,37 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   return Cost;
 }
 
-
-/// assignVirtToPhysReg - This method updates local state so that we know
-/// that PhysReg is the proper container for VirtReg now.  The physical
-/// register must not be used for anything else when this is called.
-///
-void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
-  DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
-               << PrintReg(PhysReg, TRI) << "\n");
+/// \brief This method updates local state so that we know that PhysReg is the
+/// proper container for VirtReg now.  The physical register must not be used
+/// for anything else when this is called.
+void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
+  DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to "
+               << printReg(PhysReg, TRI) << "\n");
   PhysRegState[PhysReg] = LR.VirtReg;
   assert(!LR.PhysReg && "Already assigned a physreg");
   LR.PhysReg = PhysReg;
 }
 
-RAFast::LiveRegMap::iterator
-RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+RegAllocFast::LiveRegMap::iterator
+RegAllocFast::assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg) {
   LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
   assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
   assignVirtToPhysReg(*LRI, PhysReg);
   return LRI;
 }
 
-/// allocVirtReg - Allocate a physical register for VirtReg.
-RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
-                                                  LiveRegMap::iterator LRI,
-                                                  unsigned Hint) {
+/// Allocates a physical register for VirtReg.
+RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,
+    LiveRegMap::iterator LRI, unsigned Hint) {
   const unsigned VirtReg = LRI->VirtReg;
 
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Can only allocate virtual registers");
 
-  const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-
-  // Ignore invalid hints.
-  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
-    Hint = 0;
-
   // Take hint when possible.
-  if (Hint) {
+  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+  if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
+      MRI->isAllocatable(Hint) && RC.contains(Hint)) {
     // Ignore the hint if we would have to spill a dirty register.
     unsigned Cost = calcSpillCost(Hint);
     if (Cost < spillDirty) {
@@ -550,33 +560,32 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
     }
   }
 
-  ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
-
   // First try to find a completely free register.
-  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
-    unsigned PhysReg = *I;
+  ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(&RC);
+  for (MCPhysReg PhysReg : AO) {
     if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
       assignVirtToPhysReg(*LRI, PhysReg);
       return LRI;
     }
   }
 
-  DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
-               << TRI->getRegClassName(RC) << "\n");
+  DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from "
+               << TRI->getRegClassName(&RC) << "\n");
 
-  unsigned BestReg = 0, BestCost = spillImpossible;
-  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
-    unsigned Cost = calcSpillCost(*I);
-    DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+  unsigned BestReg = 0;
+  unsigned BestCost = spillImpossible;
+  for (MCPhysReg PhysReg : AO) {
+    unsigned Cost = calcSpillCost(PhysReg);
+    DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n");
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
     DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
     // Cost is 0 when all aliases are already disabled.
     if (Cost == 0) {
-      assignVirtToPhysReg(*LRI, *I);
+      assignVirtToPhysReg(*LRI, PhysReg);
       return LRI;
     }
     if (Cost < BestCost)
-      BestReg = *I, BestCost = Cost;
+      BestReg = PhysReg, BestCost = Cost;
   }
 
   if (BestReg) {
@@ -595,11 +604,11 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
   return assignVirtToPhysReg(VirtReg, *AO.begin());
 }
 
-/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
-RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI,
-                                                   unsigned OpNum,
-                                                   unsigned VirtReg,
-                                                   unsigned Hint) {
+/// Allocates a register for VirtReg and mark it as dirty.
+RegAllocFast::LiveRegMap::iterator RegAllocFast::defineVirtReg(MachineInstr &MI,
+                                                               unsigned OpNum,
+                                                               unsigned VirtReg,
+                                                               unsigned Hint) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Not a virtual register");
   LiveRegMap::iterator LRI;
@@ -629,11 +638,11 @@ RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI,
   return LRI;
 }
 
-/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
-RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
-                                                   unsigned OpNum,
-                                                   unsigned VirtReg,
-                                                   unsigned Hint) {
+/// Make sure VirtReg is available in a physreg and return it.
+RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,
+                                                               unsigned OpNum,
+                                                               unsigned VirtReg,
+                                                               unsigned Hint) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Not a virtual register");
   LiveRegMap::iterator LRI;
@@ -642,11 +651,11 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
   MachineOperand &MO = MI.getOperand(OpNum);
   if (New) {
     LRI = allocVirtReg(MI, LRI, Hint);
-    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
-                 << PrintReg(LRI->PhysReg, TRI) << "\n");
-    TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
+    DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
+                 << printReg(LRI->PhysReg, TRI) << "\n");
+    TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI);
     ++NumLoads;
   } else if (LRI->Dirty) {
     if (isLastUseOfLocalReg(MO)) {
@@ -665,7 +674,7 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
   } else if (MO.isKill()) {
     // We must remove kill flags from uses of reloaded registers because the
     // register would be killed immediately, and there might be a second use:
-    //   %foo = OR %x<kill>, %x
+    //   %foo = OR killed %x, %x
     // This would cause a second reload of %x into a different register.
     DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
     MO.setIsKill(false);
@@ -680,241 +689,236 @@ RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
   return LRI;
 }
 
-// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
-// subregs. This may invalidate any operand pointers.
-// Return true if the operand kills its register.
-bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
-  MachineOperand &MO = MI->getOperand(OpNum);
+/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
+/// may invalidate any operand pointers.  Return true if the operand kills its
+/// register.
+bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,
+                              MCPhysReg PhysReg) {
+  MachineOperand &MO = MI.getOperand(OpNum);
   bool Dead = MO.isDead();
   if (!MO.getSubReg()) {
     MO.setReg(PhysReg);
+    MO.setIsRenamableIfNoExtraRegAllocReq();
     return MO.isKill() || Dead;
   }
 
   // Handle subregister index.
   MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+  MO.setIsRenamableIfNoExtraRegAllocReq();
   MO.setSubReg(0);
 
   // A kill flag implies killing the full register. Add corresponding super
   // register kill.
   if (MO.isKill()) {
-    MI->addRegisterKilled(PhysReg, TRI, true);
+    MI.addRegisterKilled(PhysReg, TRI, true);
     return true;
   }
 
   // A <def,read-undef> of a sub-register requires an implicit def of the full
   // register.
   if (MO.isDef() && MO.isUndef())
-    MI->addRegisterDefined(PhysReg, TRI);
+    MI.addRegisterDefined(PhysReg, TRI);
 
   return Dead;
 }
 
-// Handle special instruction operand like early clobbers and tied ops when
+// Handles special instruction operand like early clobbers and tied ops when
 // there are additional physreg defines.
-void RAFast::handleThroughOperands(MachineInstr *MI,
-                                   SmallVectorImpl<unsigned> &VirtDead) {
+void RegAllocFast::handleThroughOperands(MachineInstr &MI,
+                                         SmallVectorImpl<unsigned> &VirtDead) {
   DEBUG(dbgs() << "Scanning for through registers:");
   SmallSet<unsigned, 8> ThroughRegs;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-    if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
-        (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+    if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
+        (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
       if (ThroughRegs.insert(Reg).second)
-        DEBUG(dbgs() << ' ' << PrintReg(Reg));
+        DEBUG(dbgs() << ' ' << printReg(Reg));
     }
   }
 
   // If any physreg defines collide with preallocated through registers,
   // we must spill and reallocate.
   DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     markRegUsedInInstr(Reg);
     for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
       if (ThroughRegs.count(PhysRegState[*AI]))
-        definePhysReg(*MI, *AI, regFree);
+        definePhysReg(MI, *AI, regFree);
     }
   }
 
   SmallVector<unsigned, 8> PartialDefs;
   DEBUG(dbgs() << "Allocating tied uses.\n");
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI.getOperand(I);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
     if (MO.isUse()) {
-      unsigned DefIdx = 0;
-      if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
-      DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
-        << DefIdx << ".\n");
-      LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
-      unsigned PhysReg = LRI->PhysReg;
-      setPhysReg(MI, i, PhysReg);
+      if (!MO.isTied()) continue;
+      DEBUG(dbgs() << "Operand " << I << "("<< MO << ") is tied to operand "
+        << MI.findTiedOperandIdx(I) << ".\n");
+      LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
+      MCPhysReg PhysReg = LRI->PhysReg;
+      setPhysReg(MI, I, PhysReg);
       // Note: we don't update the def operand yet. That would cause the normal
       // def-scan to attempt spilling.
-    } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+    } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
       DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
       // Reload the register, but don't assign to the operand just yet.
       // That would confuse the later phys-def processing pass.
-      LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
+      LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);
       PartialDefs.push_back(LRI->PhysReg);
     }
   }
 
   DEBUG(dbgs() << "Allocating early clobbers.\n");
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI.getOperand(I);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
     if (!MO.isEarlyClobber())
       continue;
     // Note: defineVirtReg may invalidate MO.
-    LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, 0);
-    unsigned PhysReg = LRI->PhysReg;
-    if (setPhysReg(MI, i, PhysReg))
+    LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, 0);
+    MCPhysReg PhysReg = LRI->PhysReg;
+    if (setPhysReg(MI, I, PhysReg))
       VirtDead.push_back(Reg);
   }
 
   // Restore UsedInInstr to a state usable for allocating normal virtual uses.
   UsedInInstr.clear();
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
-    DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+    DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
                  << " as used in instr\n");
     markRegUsedInInstr(Reg);
   }
 
   // Also mark PartialDefs as used to avoid reallocation.
-  for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
-    markRegUsedInInstr(PartialDefs[i]);
+  for (unsigned PartialDef : PartialDefs)
+    markRegUsedInInstr(PartialDef);
+}
+
+#ifndef NDEBUG
+void RegAllocFast::dumpState() {
+  for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+    if (PhysRegState[Reg] == regDisabled) continue;
+    dbgs() << " " << printReg(Reg, TRI);
+    switch(PhysRegState[Reg]) {
+    case regFree:
+      break;
+    case regReserved:
+      dbgs() << "*";
+      break;
+    default: {
+      dbgs() << '=' << printReg(PhysRegState[Reg]);
+      LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+      assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+      if (I->Dirty)
+        dbgs() << "*";
+      assert(I->PhysReg == Reg && "Bad inverse map");
+      break;
+    }
+    }
+  }
+  dbgs() << '\n';
+  // Check that LiveVirtRegs is the inverse.
+  for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+       e = LiveVirtRegs.end(); i != e; ++i) {
+    assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
+           "Bad map key");
+    assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
+           "Bad map value");
+    assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+  }
 }
+#endif
 
-void RAFast::AllocateBasicBlock() {
-  DEBUG(dbgs() << "\nAllocating " << *MBB);
+void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
+  this->MBB = &MBB;
+  DEBUG(dbgs() << "\nAllocating " << MBB);
 
   PhysRegState.assign(TRI->getNumRegs(), regDisabled);
   assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
 
-  MachineBasicBlock::iterator MII = MBB->begin();
+  MachineBasicBlock::iterator MII = MBB.begin();
 
   // Add live-in registers as live.
-  for (const auto &LI : MBB->liveins())
+  for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins())
     if (MRI->isAllocatable(LI.PhysReg))
       definePhysReg(*MII, LI.PhysReg, regReserved);
 
-  SmallVector<unsigned, 8> VirtDead;
-  SmallVector<MachineInstr*, 32> Coalesced;
+  VirtDead.clear();
+  Coalesced.clear();
 
   // Otherwise, sequentially allocate each instruction in the MBB.
-  while (MII != MBB->end()) {
-    MachineInstr *MI = &*MII++;
-    const MCInstrDesc &MCID = MI->getDesc();
-    DEBUG({
-        dbgs() << "\n>> " << *MI << "Regs:";
-        for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
-          if (PhysRegState[Reg] == regDisabled) continue;
-          dbgs() << " " << TRI->getName(Reg);
-          switch(PhysRegState[Reg]) {
-          case regFree:
-            break;
-          case regReserved:
-            dbgs() << "*";
-            break;
-          default: {
-            dbgs() << '=' << PrintReg(PhysRegState[Reg]);
-            LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
-            assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
-            if (I->Dirty)
-              dbgs() << "*";
-            assert(I->PhysReg == Reg && "Bad inverse map");
-            break;
-          }
-          }
-        }
-        dbgs() << '\n';
-        // Check that LiveVirtRegs is the inverse.
-        for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
-             e = LiveVirtRegs.end(); i != e; ++i) {
-           assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
-                  "Bad map key");
-           assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
-                  "Bad map value");
-           assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
-        }
-      });
+  for (MachineInstr &MI : MBB) {
+    const MCInstrDesc &MCID = MI.getDesc();
+    DEBUG(
+      dbgs() << "\n>> " << MI << "Regs:";
+      dumpState()
+    );
 
     // Debug values are not allowed to change codegen in any way.
-    if (MI->isDebugValue()) {
-      bool ScanDbgValue = true;
-      while (ScanDbgValue) {
-        ScanDbgValue = false;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          MachineOperand &MO = MI->getOperand(i);
-          if (!MO.isReg()) continue;
-          unsigned Reg = MO.getReg();
-          if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
-          LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
-          if (LRI != LiveVirtRegs.end())
-            setPhysReg(MI, i, LRI->PhysReg);
-          else {
-            int SS = StackSlotForVirtReg[Reg];
-            if (SS == -1) {
-              // We can't allocate a physreg for a DebugValue, sorry!
-              DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
-              MO.setReg(0);
-            }
-            else {
-              // Modify DBG_VALUE now that the value is in a spill slot.
-              bool IsIndirect = MI->isIndirectDebugValue();
-              uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
-              const MDNode *Var = MI->getDebugVariable();
-              const MDNode *Expr = MI->getDebugExpression();
-              DebugLoc DL = MI->getDebugLoc();
-              MachineBasicBlock *MBB = MI->getParent();
-              assert(
-                  cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
-                  "Expected inlined-at fields to agree");
-              MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
-                                            TII->get(TargetOpcode::DBG_VALUE))
-                                        .addFrameIndex(SS)
-                                        .addImm(Offset)
-                                        .addMetadata(Var)
-                                        .addMetadata(Expr);
-              DEBUG(dbgs() << "Modifying debug info due to spill:"
-                           << "\t" << *NewDV);
-              // Scan NewDV operands from the beginning.
-              MI = NewDV;
-              ScanDbgValue = true;
-              break;
-            }
-          }
-          LiveDbgValueMap[Reg].push_back(MI);
+    if (MI.isDebugValue()) {
+      MachineInstr *DebugMI = &MI;
+      MachineOperand &MO = DebugMI->getOperand(0);
+
+      // Ignore DBG_VALUEs that aren't based on virtual registers. These are
+      // mostly constants and frame indices.
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      // See if this virtual register has already been allocated to a physical
+      // register or spilled to a stack slot.
+      LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+      if (LRI != LiveVirtRegs.end())
+        setPhysReg(*DebugMI, 0, LRI->PhysReg);
+      else {
+        int SS = StackSlotForVirtReg[Reg];
+        if (SS != -1) {
+          // Modify DBG_VALUE now that the value is in a spill slot.
+          updateDbgValueForSpill(*DebugMI, SS);
+          DEBUG(dbgs() << "Modifying debug info due to spill:"
+                       << "\t" << *DebugMI);
+          continue;
         }
+
+        // We can't allocate a physreg for a DebugValue, sorry!
+        DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+        MO.setReg(0);
       }
-      // Next instruction.
+
+      // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
+      // that future spills of Reg will have DBG_VALUEs.
+      LiveDbgValueMap[Reg].push_back(DebugMI);
       continue;
     }
 
     // If this is a copy, we may be able to coalesce.
-    unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
-    if (MI->isCopy()) {
-      CopyDst = MI->getOperand(0).getReg();
-      CopySrc = MI->getOperand(1).getReg();
-      CopyDstSub = MI->getOperand(0).getSubReg();
-      CopySrcSub = MI->getOperand(1).getSubReg();
+    unsigned CopySrcReg = 0;
+    unsigned CopyDstReg = 0;
+    unsigned CopySrcSub = 0;
+    unsigned CopyDstSub = 0;
+    if (MI.isCopy()) {
+      CopyDstReg = MI.getOperand(0).getReg();
+      CopySrcReg = MI.getOperand(1).getReg();
+      CopyDstSub = MI.getOperand(0).getSubReg();
+      CopySrcSub = MI.getOperand(1).getSubReg();
     }
 
     // Track registers used by instruction.
@@ -928,8 +932,8 @@ void RAFast::AllocateBasicBlock() {
     bool hasEarlyClobbers = false;
     bool hasPartialRedefs = false;
     bool hasPhysDefs = false;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
       // Make sure MRI knows about registers clobbered by regmasks.
       if (MO.isRegMask()) {
         MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
@@ -946,7 +950,7 @@ void RAFast::AllocateBasicBlock() {
         } else {
           if (MO.isEarlyClobber())
             hasEarlyClobbers = true;
-          if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+          if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
             hasPartialRedefs = true;
         }
         continue;
@@ -955,7 +959,7 @@ void RAFast::AllocateBasicBlock() {
       if (MO.isUse()) {
         usePhysReg(MO);
       } else if (MO.isEarlyClobber()) {
-        definePhysReg(*MI, Reg,
+        definePhysReg(MI, Reg,
                       (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
         hasEarlyClobbers = true;
       } else
@@ -971,11 +975,11 @@ void RAFast::AllocateBasicBlock() {
     // sure the same register is allocated to uses and defs.
     // We didn't detect inline asm tied operands above, so just make this extra
     // pass for all inline asm.
-    if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+    if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
         (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
       handleThroughOperands(MI, VirtDead);
       // Don't attempt coalescing when we have funny stuff going on.
-      CopyDst = 0;
+      CopyDstReg = 0;
       // Pretend we have early clobbers so the use operands get marked below.
       // This is not necessary for the common case of a single tied use.
       hasEarlyClobbers = true;
@@ -983,16 +987,16 @@ void RAFast::AllocateBasicBlock() {
 
     // Second scan.
     // Allocate virtreg uses.
-    for (unsigned i = 0; i != VirtOpEnd; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
+    for (unsigned I = 0; I != VirtOpEnd; ++I) {
+      const MachineOperand &MO = MI.getOperand(I);
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
       if (MO.isUse()) {
-        LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, CopyDst);
-        unsigned PhysReg = LRI->PhysReg;
-        CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
-        if (setPhysReg(MI, i, PhysReg))
+        LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, CopyDstReg);
+        MCPhysReg PhysReg = LRI->PhysReg;
+        CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
+        if (setPhysReg(MI, I, PhysReg))
           killVirtReg(LRI);
       }
     }
@@ -1001,19 +1005,18 @@ void RAFast::AllocateBasicBlock() {
     // this point.
     UsedInInstr.clear();
     if (hasEarlyClobbers) {
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
+      for (const MachineOperand &MO : MI.operands()) {
         if (!MO.isReg()) continue;
         unsigned Reg = MO.getReg();
         if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
         // Look for physreg defs and tied uses.
-        if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+        if (!MO.isDef() && !MO.isTied()) continue;
         markRegUsedInInstr(Reg);
       }
     }
 
-    unsigned DefOpEnd = MI->getNumOperands();
-    if (MI->isCall()) {
+    unsigned DefOpEnd = MI.getNumOperands();
+    if (MI.isCall()) {
       // Spill all virtregs before a call. This serves one purpose: If an
       // exception is thrown, the landing pad is going to expect to find
       // registers in their spill slots.
@@ -1023,99 +1026,92 @@ void RAFast::AllocateBasicBlock() {
       // those for virtual registers in between.
       DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
       spillAll(MI);
-
-      // The imp-defs are skipped below, but we still need to mark those
-      // registers as used by the function.
-      SkippedInstrs.insert(&MCID);
     }
 
     // Third scan.
     // Allocate defs and collect dead defs.
-    for (unsigned i = 0; i != DefOpEnd; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
+    for (unsigned I = 0; I != DefOpEnd; ++I) {
+      const MachineOperand &MO = MI.getOperand(I);
       if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
         continue;
       unsigned Reg = MO.getReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         if (!MRI->isAllocatable(Reg)) continue;
-        definePhysReg(*MI, Reg, MO.isDead() ? regFree : regReserved);
+        definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
         continue;
       }
-      LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, CopySrc);
-      unsigned PhysReg = LRI->PhysReg;
-      if (setPhysReg(MI, i, PhysReg)) {
+      LiveRegMap::iterator LRI = defineVirtReg(MI, I, Reg, CopySrcReg);
+      MCPhysReg PhysReg = LRI->PhysReg;
+      if (setPhysReg(MI, I, PhysReg)) {
         VirtDead.push_back(Reg);
-        CopyDst = 0; // cancel coalescing;
+        CopyDstReg = 0; // cancel coalescing;
       } else
-        CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+        CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
     }
 
     // Kill dead defs after the scan to ensure that multiple defs of the same
     // register are allocated identically. We didn't need to do this for uses
     // because we are crerating our own kill flags, and they are always at the
     // last use.
-    for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
-      killVirtReg(VirtDead[i]);
+    for (unsigned VirtReg : VirtDead)
+      killVirtReg(VirtReg);
     VirtDead.clear();
 
-    if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
-      DEBUG(dbgs() << "-- coalescing: " << *MI);
-      Coalesced.push_back(MI);
+    if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+      DEBUG(dbgs() << "-- coalescing: " << MI);
+      Coalesced.push_back(&MI);
     } else {
-      DEBUG(dbgs() << "<< " << *MI);
+      DEBUG(dbgs() << "<< " << MI);
     }
   }
 
   // Spill all physical registers holding virtual registers now.
   DEBUG(dbgs() << "Spilling live registers at end of block.\n");
-  spillAll(MBB->getFirstTerminator());
+  spillAll(MBB.getFirstTerminator());
 
   // Erase all the coalesced copies. We are delaying it until now because
   // LiveVirtRegs might refer to the instrs.
-  for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
-    MBB->erase(Coalesced[i]);
+  for (MachineInstr *MI : Coalesced)
+    MBB.erase(MI);
   NumCopies += Coalesced.size();
 
-  DEBUG(MBB->dump());
+  DEBUG(MBB.dump());
 }
 
-/// runOnMachineFunction - Register allocate the whole function
-///
-bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+/// Allocates registers for a function.
+bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
-               << "********** Function: " << Fn.getName() << '\n');
-  MF = &Fn;
-  MRI = &MF->getRegInfo();
-  TRI = MF->getSubtarget().getRegisterInfo();
-  TII = MF->getSubtarget().getInstrInfo();
-  MRI->freezeReservedRegs(Fn);
-  RegClassInfo.runOnMachineFunction(Fn);
+               << "********** Function: " << MF.getName() << '\n');
+  MRI = &MF.getRegInfo();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  TRI = STI.getRegisterInfo();
+  TII = STI.getInstrInfo();
+  MFI = &MF.getFrameInfo();
+  MRI->freezeReservedRegs(MF);
+  RegClassInfo.runOnMachineFunction(MF);
   UsedInInstr.clear();
   UsedInInstr.setUniverse(TRI->getNumRegUnits());
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
-  StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
-  LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+  unsigned NumVirtRegs = MRI->getNumVirtRegs();
+  StackSlotForVirtReg.resize(NumVirtRegs);
+  LiveVirtRegs.setUniverse(NumVirtRegs);
 
   // Loop over all of the basic blocks, eliminating virtual register references
-  for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
-       MBBi != MBBe; ++MBBi) {
-    MBB = &*MBBi;
-    AllocateBasicBlock();
-  }
+  for (MachineBasicBlock &MBB : MF)
+    allocateBasicBlock(MBB);
 
   // All machine operands and other references to virtual registers have been
   // replaced. Remove the virtual registers.
   MRI->clearVirtRegs();
 
-  SkippedInstrs.clear();
   StackSlotForVirtReg.clear();
   LiveDbgValueMap.clear();
   return true;
 }
 
 FunctionPass *llvm::createFastRegisterAllocator() {
-  return new RAFast();
+  return new RegAllocFast();
 }
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 020e81eca2dd..186ef577e31d 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -30,12 +31,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -53,6 +54,9 @@
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
@@ -65,10 +69,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -104,10 +105,11 @@ static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
              " interference at a time"),
     cl::init(8));
 
-static cl::opt<bool>
-ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
-                 cl::desc("Exhaustive Search for registers bypassing the depth "
-                          "and interference cutoffs of last chance recoloring"));
+static cl::opt<bool> ExhaustiveSearch(
+    "exhaustive-register-search", cl::NotHidden,
+    cl::desc("Exhaustive Search for registers bypassing the depth "
+             "and interference cutoffs of last chance recoloring"),
+    cl::Hidden);
 
 static cl::opt<bool> EnableLocalReassignment(
     "enable-local-reassign", cl::Hidden,
@@ -129,6 +131,12 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
               cl::desc("Cost for first time use of callee-saved register."),
               cl::init(0), cl::Hidden);
 
+static cl::opt<bool> ConsiderLocalIntervalCost(
+    "condsider-local-interval-cost", cl::Hidden,
+    cl::desc("Consider the cost of local intervals created by a split "
+             "candidate when choosing the best split candidate."),
+    cl::init(false));
+
 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
                                        createGreedyRegisterAllocator);
 
@@ -277,6 +285,57 @@ class RAGreedy : public MachineFunctionPass,
     }
   };
 
+  /// EvictionTrack - Keeps track of past evictions in order to optimize region
+  /// split decision.
+  class EvictionTrack {
+
+  public:
+    using EvictorInfo =
+        std::pair<unsigned /* evictor */, unsigned /* physreg */>;
+    using EvicteeInfo = llvm::MapVector<unsigned /* evictee */, EvictorInfo>;
+
+  private:
+    /// Each Vreg that has been evicted in the last stage of selectOrSplit will
+    /// be mapped to the evictor Vreg and the PhysReg it was evicted from.
+    EvicteeInfo Evictees;
+
+  public:
+    /// \brief Clear all eviction information.
+    void clear() { Evictees.clear(); }
+
+    /// \brief  Clear eviction information for the given evictee Vreg.
+    /// E.g. when Vreg get's a new allocation, the old eviction info is no
+    /// longer relevant.
+    /// \param Evictee The evictee Vreg for whom we want to clear collected
+    /// eviction info.
+    void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); }
+
+    /// \brief Track new eviction.
+    /// The Evictor vreg has evicted the Evictee vreg from Physreg.
+    /// \praram PhysReg The phisical register Evictee was evicted from.
+    /// \praram Evictor The evictor Vreg that evicted Evictee.
+    /// \praram Evictee The evictee Vreg.
+    void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
+      Evictees[Evictee].first = Evictor;
+      Evictees[Evictee].second = PhysReg;
+    }
+
+    /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
+    /// \praram Evictee The evictee vreg.
+    /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
+    /// nobody has evicted Evictee from PhysReg.
+    EvictorInfo getEvictor(unsigned Evictee) {
+      if (Evictees.count(Evictee)) {
+        return Evictees[Evictee];
+      }
+
+      return EvictorInfo(0, 0);
+    }
+  };
+
+  // Keeps track of past evictions in order to optimize region split decision.
+  EvictionTrack LastEvicted;
+
   // splitting state.
   std::unique_ptr<SplitAnalysis> SA;
   std::unique_ptr<SplitEditor> SE;
@@ -340,6 +399,10 @@ class RAGreedy : public MachineFunctionPass,
   /// obtained from the TargetSubtargetInfo.
   bool EnableLocalReassign;
 
+  /// Enable or not the the consideration of the cost of local intervals created
+  /// by a split candidate when choosing the best split candidate.
+  bool EnableAdvancedRASplitCost;
+
   /// Set of broken hints that may be reconciled later because of eviction.
   SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
 
@@ -382,13 +445,24 @@ private:
   bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
   void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
   void growRegion(GlobalSplitCandidate &Cand);
-  BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
+  bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
+                                  unsigned BBNumber,
+                                  const AllocationOrder &Order);
+  BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
+                                     const AllocationOrder &Order,
+                                     bool *CanCauseEvictionChain);
   bool calcCompactRegion(GlobalSplitCandidate&);
   void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
   unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
   bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+  bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
+                                   SlotIndex Start, SlotIndex End,
+                                   EvictionCost &MaxCost);
+  unsigned getCheapestEvicteeWeight(const AllocationOrder &Order,
+                                    LiveInterval &VirtReg, SlotIndex Start,
+                                    SlotIndex End, float *BestEvictWeight);
   void evictInterference(LiveInterval&, unsigned,
                          SmallVectorImpl<unsigned>&);
   bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
@@ -405,7 +479,8 @@ private:
   unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
                                     AllocationOrder &Order,
                                     BlockFrequency &BestCost,
-                                    unsigned &NumCands, bool IgnoreCSR);
+                                    unsigned &NumCands, bool IgnoreCSR,
+                                    bool *CanCauseEvictionChain = nullptr);
   /// Perform region splitting.
   unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
                          bool HasCompact,
@@ -546,14 +621,17 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
 //===----------------------------------------------------------------------===//
 
 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+  LiveInterval &LI = LIS->getInterval(VirtReg);
   if (VRM->hasPhys(VirtReg)) {
-    LiveInterval &LI = LIS->getInterval(VirtReg);
     Matrix->unassign(LI);
     aboutToRemoveInterval(LI);
     return true;
   }
   // Unassigned virtreg is probably in the priority queue.
   // RegAllocBase will erase it after dequeueing.
+  // Nonetheless, clear the live-range so that the debug
+  // dump will show the right state for that VirtReg.
+  LI.clear();
   return false;
 }
 
@@ -685,7 +763,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   // preferred register.
   if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
     if (Order.isHint(Hint)) {
-      DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+      DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
       EvictionCost MaxCost;
       MaxCost.setBrokenHints(1);
       if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -704,7 +782,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   if (!Cost)
     return PhysReg;
 
-  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+  DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost
                << '\n');
   unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
   return CheapReg ? CheapReg : PhysReg;
@@ -734,7 +812,7 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
   }
   if (PhysReg)
     DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
-          << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI)
+          << printReg(PrevReg, TRI) << " to " << printReg(PhysReg, TRI)
           << '\n');
   return PhysReg;
 }
@@ -856,6 +934,92 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
   return true;
 }
 
+/// \brief Return true if all interferences between VirtReg and PhysReg between
+/// Start and End can be evicted.
+///
+/// \param VirtReg Live range that is about to be assigned.
+/// \param PhysReg Desired register for assignment.
+/// \param Start   Start of range to look for interferences.
+/// \param End     End of range to look for interferences.
+/// \param MaxCost Only look for cheaper candidates and update with new cost
+///                when returning true.
+/// \return True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
+                                           unsigned PhysReg, SlotIndex Start,
+                                           SlotIndex End,
+                                           EvictionCost &MaxCost) {
+  EvictionCost Cost;
+
+  for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+    LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+
+    // Check if any interfering live range is heavier than MaxWeight.
+    for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+
+      // Check if interference overlast the segment in interest.
+      if (!Intf->overlaps(Start, End))
+        continue;
+
+      // Cannot evict non virtual reg interference.
+      if (!TargetRegisterInfo::isVirtualRegister(Intf->reg))
+        return false;
+      // Never evict spill products. They cannot split or spill.
+      if (getStage(*Intf) == RS_Done)
+        return false;
+
+      // Would this break a satisfied hint?
+      bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+      // Update eviction cost.
+      Cost.BrokenHints += BreaksHint;
+      Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+      // Abort if this would be too expensive.
+      if (!(Cost < MaxCost))
+        return false;
+    }
+  }
+
+  if (Cost.MaxWeight == 0)
+    return false;
+
+  MaxCost = Cost;
+  return true;
+}
+
+/// \brief Return tthe physical register that will be best
+/// candidate for eviction by a local split interval that will be created
+/// between Start and End.
+///
+/// \param Order            The allocation order
+/// \param VirtReg          Live range that is about to be assigned.
+/// \param Start            Start of range to look for interferences
+/// \param End              End of range to look for interferences
+/// \param BestEvictweight  The eviction cost of that eviction
+/// \return The PhysReg which is the best candidate for eviction and the
+/// eviction cost in BestEvictweight
+unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
+                                            LiveInterval &VirtReg,
+                                            SlotIndex Start, SlotIndex End,
+                                            float *BestEvictweight) {
+  EvictionCost BestEvictCost;
+  BestEvictCost.setMax();
+  BestEvictCost.MaxWeight = VirtReg.weight;
+  unsigned BestEvicteePhys = 0;
+
+  // Go over all physical registers and find the best candidate for eviction
+  for (auto PhysReg : Order.getOrder()) {
+
+    if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
+                                     BestEvictCost))
+      continue;
+
+    // Best so far.
+    BestEvicteePhys = PhysReg;
+  }
+  *BestEvictweight = BestEvictCost.MaxWeight;
+  return BestEvicteePhys;
+}
+
 /// evictInterference - Evict any interferring registers that prevent VirtReg
 /// from being assigned to Physreg. This assumes that canEvictInterference
 /// returned true.
@@ -868,7 +1032,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
   if (!Cascade)
     Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
 
-  DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+  DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
                << " interference: Cascade " << Cascade << '\n');
 
   // Collect all interfering virtregs first.
@@ -890,6 +1054,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
     if (!VRM->hasPhys(Intf->reg))
       continue;
+
+    LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg);
+
     Matrix->unassign(*Intf);
     assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
             VirtReg.isSpillable() < Intf->isSpillable()) &&
@@ -957,8 +1124,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
     // The first use of a callee-saved register in a function has cost 1.
     // Don't start using a CSR when the CostPerUseLimit is low.
     if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
-      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
-            << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+      DEBUG(dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
+            << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
             << '\n');
       continue;
     }
@@ -1211,13 +1378,117 @@ BlockFrequency RAGreedy::calcSpillCost() {
   return Cost;
 }
 
+/// \brief Check if splitting Evictee will create a local split interval in
+/// basic block number BBNumber that may cause a bad eviction chain. This is
+/// intended to prevent bad eviction sequences like:
+/// movl	%ebp, 8(%esp)           # 4-byte Spill
+/// movl	%ecx, %ebp
+/// movl	%ebx, %ecx
+/// movl	%edi, %ebx
+/// movl	%edx, %edi
+/// cltd
+/// idivl	%esi
+/// movl	%edi, %edx
+/// movl	%ebx, %edi
+/// movl	%ecx, %ebx
+/// movl	%ebp, %ecx
+/// movl	16(%esp), %ebp          # 4 - byte Reload
+///
+/// Such sequences are created in 2 scenarios:
+///
+/// Scenario #1:
+/// %0 is evicted from physreg0 by %1.
+/// Evictee %0 is intended for region splitting with split candidate
+/// physreg0 (the reg %0 was evicted from).
+/// Region splitting creates a local interval because of interference with the
+/// evictor %1 (normally region spliitting creates 2 interval, the "by reg"
+/// and "by stack" intervals and local interval created when interference
+/// occurs).
+/// One of the split intervals ends up evicting %2 from physreg1.
+/// Evictee %2 is intended for region splitting with split candidate
+/// physreg1.
+/// One of the split intervals ends up evicting %3 from physreg2, etc.
+///
+/// Scenario #2
+/// %0 is evicted from physreg0 by %1.
+/// %2 is evicted from physreg2 by %3 etc.
+/// Evictee %0 is intended for region splitting with split candidate
+/// physreg1.
+/// Region splitting creates a local interval because of interference with the
+/// evictor %1.
+/// One of the split intervals ends up evicting back original evictor %1
+/// from physreg0 (the reg %0 was evicted from).
+/// Another evictee %2 is intended for region splitting with split candidate
+/// physreg1.
+/// One of the split intervals ends up evicting %3 from physreg2, etc.
+///
+/// \param Evictee  The register considered to be split.
+/// \param Cand     The split candidate that determines the physical register
+///                 we are splitting for and the interferences.
+/// \param BBNumber The number of a BB for which the region split process will
+///                 create a local split interval.
+/// \param Order    The phisical registers that may get evicted by a split
+///                 artifact of Evictee.
+/// \return True if splitting Evictee may cause a bad eviction chain, false
+/// otherwise.
+bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
+                                          GlobalSplitCandidate &Cand,
+                                          unsigned BBNumber,
+                                          const AllocationOrder &Order) {
+  EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
+  unsigned Evictor = VregEvictorInfo.first;
+  unsigned PhysReg = VregEvictorInfo.second;
+
+  // No actual evictor.
+  if (!Evictor || !PhysReg)
+    return false;
+
+  float MaxWeight = 0;
+  unsigned FutureEvictedPhysReg =
+      getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
+                               Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
+
+  // The bad eviction chain occurs when either the split candidate the the
+  // evited reg or one of the split artifact will evict the evicting reg.
+  if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
+    return false;
+
+  Cand.Intf.moveToBlock(BBNumber);
+
+  // Check to see if the Evictor contains interference (with Evictee) in the
+  // given BB. If so, this interference caused the eviction of Evictee from
+  // PhysReg. This suggest that we will create a local interval during the
+  // region split to avoid this interference This local interval may cause a bad
+  // eviction chain.
+  if (!LIS->hasInterval(Evictor))
+    return false;
+  LiveInterval &EvictorLI = LIS->getInterval(Evictor);
+  if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
+    return false;
+
+  // Now, check to see if the local interval we will create is going to be
+  // expensive enough to evict somebody If so, this may cause a bad eviction
+  // chain.
+  VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
+  float splitArtifactWeight =
+      VRAI.futureWeight(LIS->getInterval(Evictee),
+                        Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
+  if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
+    return false;
+
+  return true;
+}
+
 /// calcGlobalSplitCost - Return the global split cost of following the split
 /// pattern in LiveBundles. This cost should be added to the local cost of the
 /// interference pattern in SplitConstraints.
 ///
-BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
+                                             const AllocationOrder &Order,
+                                             bool *CanCauseEvictionChain) {
   BlockFrequency GlobalCost = 0;
   const BitVector &LiveBundles = Cand.LiveBundles;
+  unsigned VirtRegToSplit = SA->getParent().reg;
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
@@ -1226,6 +1497,24 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
     unsigned Ins = 0;
 
+    Cand.Intf.moveToBlock(BC.Number);
+    // Check wheather a local interval is going to be created during the region
+    // split.
+    if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
+        Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn &&
+        RegOut) {
+
+      if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
+        // This interfernce cause our eviction from this assignment, we might
+        // evict somebody else, add that cost.
+        // See splitCanCauseEvictionChain for detailed description of scenarios.
+        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
+        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
+
+        *CanCauseEvictionChain = true;
+      }
+    }
+
     if (BI.LiveIn)
       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
     if (BI.LiveOut)
@@ -1246,6 +1535,20 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
       if (Cand.Intf.hasInterference()) {
         GlobalCost += SpillPlacer->getBlockFrequency(Number);
         GlobalCost += SpillPlacer->getBlockFrequency(Number);
+
+        // Check wheather a local interval is going to be created during the
+        // region split.
+        if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
+            splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
+          // This interfernce cause our eviction from this assignment, we might
+          // evict somebody else, add that cost.
+          // See splitCanCauseEvictionChain for detailed description of
+          // scenarios.
+          GlobalCost += SpillPlacer->getBlockFrequency(Number);
+          GlobalCost += SpillPlacer->getBlockFrequency(Number);
+
+          *CanCauseEvictionChain = true;
+        }
       }
       continue;
     }
@@ -1309,7 +1612,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
 
     // Create separate intervals for isolated blocks with multiple uses.
     if (!IntvIn && !IntvOut) {
-      DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+      DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
         SE->splitSingleBlock(BI);
       continue;
@@ -1410,6 +1713,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                   SmallVectorImpl<unsigned> &NewVRegs) {
   unsigned NumCands = 0;
+  BlockFrequency SpillCost = calcSpillCost();
   BlockFrequency BestCost;
 
   // Check if we can split this live range around a compact region.
@@ -1421,14 +1725,24 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   } else {
     // No benefit from the compact region, our fallback will be per-block
     // splitting. Make sure we find a solution that is cheaper than spilling.
-    BestCost = calcSpillCost();
+    BestCost = SpillCost;
     DEBUG(dbgs() << "Cost of isolating all blocks = ";
                  MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
   }
 
+  bool CanCauseEvictionChain = false;
   unsigned BestCand =
       calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
-                               false/*IgnoreCSR*/);
+                               false /*IgnoreCSR*/, &CanCauseEvictionChain);
+
+  // Split candidates with compact regions can cause a bad eviction sequence.
+  // See splitCanCauseEvictionChain for detailed description of scenarios.
+  // To avoid it, we need to comapre the cost with the spill cost and not the
+  // current max frequency.
+  if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
+    CanCauseEvictionChain) {
+    return 0;
+  }
 
   // No solutions found, fall back to single block splitting.
   if (!HasCompact && BestCand == NoCand)
@@ -1440,8 +1754,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
                                             AllocationOrder &Order,
                                             BlockFrequency &BestCost,
-                                            unsigned &NumCands,
-                                            bool IgnoreCSR) {
+                                            unsigned &NumCands, bool IgnoreCSR,
+                                            bool *CanCauseEvictionChain) {
   unsigned BestCand = NoCand;
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
@@ -1476,10 +1790,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
     SpillPlacer->prepare(Cand.LiveBundles);
     BlockFrequency Cost;
     if (!addSplitConstraints(Cand.Intf, Cost)) {
-      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+      DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
       continue;
     }
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
+    DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
                  MBFI->printBlockFreq(dbgs(), Cost));
     if (Cost >= BestCost) {
       DEBUG({
@@ -1487,7 +1801,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
           dbgs() << " worse than no bundles\n";
         else
           dbgs() << " worse than "
-                 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+                 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
       });
       continue;
     }
@@ -1501,7 +1815,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
       continue;
     }
 
-    Cost += calcGlobalSplitCost(Cand);
+    bool HasEvictionChain = false;
+    Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
     DEBUG({
       dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
                                 << " with bundles";
@@ -1512,9 +1827,24 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
     if (Cost < BestCost) {
       BestCand = NumCands;
       BestCost = Cost;
+      // See splitCanCauseEvictionChain for detailed description of bad
+      // eviction chain scenarios.
+      if (CanCauseEvictionChain)
+        *CanCauseEvictionChain = HasEvictionChain;
     }
     ++NumCands;
   }
+
+  if (CanCauseEvictionChain && BestCand != NoCand) {
+    // See splitCanCauseEvictionChain for detailed description of bad
+    // eviction chain scenarios.
+    DEBUG(dbgs() << "Best split candidate of vreg "
+                 << printReg(VirtReg.reg, TRI) << "  may ");
+    if (!(*CanCauseEvictionChain))
+      DEBUG(dbgs() << "not ");
+    DEBUG(dbgs() << "cause bad eviction chain\n");
+  }
+
   return BestCand;
 }
 
@@ -1535,7 +1865,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
       UsedCands.push_back(BestCand);
       Cand.IntvIdx = SE->openIntv();
-      DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
+      DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
                    << B << " bundles, intv " << Cand.IntvIdx << ".\n");
       (void)B;
     }
@@ -1884,7 +2214,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
 
-      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+      DEBUG(dbgs() << printReg(PhysReg, TRI) << ' '
                    << Uses[SplitBefore] << '-' << Uses[SplitAfter]
                    << " i=" << MaxGap);
 
@@ -1985,7 +2315,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
       if (IntvMap[i] == 1) {
         setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
-        DEBUG(dbgs() << PrintReg(LREdit.get(i)));
+        DEBUG(dbgs() << printReg(LREdit.get(i)));
       }
     DEBUG(dbgs() << '\n');
   }
@@ -2051,6 +2381,15 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 //                          Last Chance Recoloring
 //===----------------------------------------------------------------------===//
 
+/// Return true if \p reg has any tied def operand.
+static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
+  for (const MachineOperand &MO : MRI->def_operands(reg))
+    if (MO.isTied())
+      return true;
+
+  return false;
+}
+
 /// mayRecolorAllInterferences - Check if the virtual registers that
 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
 /// recolored to free \p PhysReg.
@@ -2079,10 +2418,13 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
       // If Intf is done and sit on the same register class as VirtReg,
       // it would not be recolorable as it is in the same state as VirtReg.
-      if ((getStage(*Intf) == RS_Done &&
-           MRI->getRegClass(Intf->reg) == CurRC) ||
+      // However, if VirtReg has tied defs and Intf doesn't, then
+      // there is still a point in examining if it can be recolorable.
+      if (((getStage(*Intf) == RS_Done &&
+            MRI->getRegClass(Intf->reg) == CurRC) &&
+           !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) ||
           FixedRegisters.count(Intf->reg)) {
-        DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
+        DEBUG(dbgs() << "Early abort: the interference is not recolorable.\n");
         return false;
       }
       RecoloringCandidates.insert(Intf);
@@ -2162,7 +2504,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
     DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
-                 << PrintReg(PhysReg, TRI) << '\n');
+                 << printReg(PhysReg, TRI) << '\n');
     RecoloringCandidates.clear();
     VirtRegToPhysReg.clear();
     CurrentNewVRegs.clear();
@@ -2170,7 +2512,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
     // It is only possible to recolor virtual register interference.
     if (Matrix->checkInterference(VirtReg, PhysReg) >
         LiveRegMatrix::IK_VirtReg) {
-      DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
+      DEBUG(dbgs() << "Some interferences are not with virtual registers.\n");
 
       continue;
     }
@@ -2179,7 +2521,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
     // the interferences.
     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
                                     FixedRegisters)) {
-      DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
+      DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
       continue;
     }
 
@@ -2222,7 +2564,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
     }
 
     DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
-                 << PrintReg(PhysReg, TRI) << '\n');
+                 << printReg(PhysReg, TRI) << '\n');
 
     // The recoloring attempt failed, undo the changes.
     FixedRegisters = SaveFixedRegisters;
@@ -2285,7 +2627,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
       continue;
     }
     DEBUG(dbgs() << "Recoloring of " << *LI
-                 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+                 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
 
     Matrix->assign(*LI, PhysReg);
     FixedRegisters.insert(LI->reg);
@@ -2300,7 +2642,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
                                  SmallVectorImpl<unsigned> &NewVRegs) {
   CutOffInfo = CO_None;
-  LLVMContext &Ctx = MF->getFunction()->getContext();
+  LLVMContext &Ctx = MF->getFunction().getContext();
   SmallVirtRegSet FixedRegisters;
   unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
   if (Reg == ~0U && (CutOffInfo != CO_None)) {
@@ -2452,8 +2794,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
   Visited.insert(Reg);
   RecoloringCandidates.push_back(Reg);
 
-  DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
-               << PrintReg(PhysReg, TRI) << ")\n");
+  DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) << '('
+               << printReg(PhysReg, TRI) << ")\n");
 
   do {
     Reg = RecoloringCandidates.pop_back_val();
@@ -2474,7 +2816,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
                                 Matrix->checkInterference(LI, PhysReg)))
       continue;
 
-    DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
+    DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
                  << ") is recolorable.\n");
 
     // Gather the hint info.
@@ -2565,6 +2907,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
   // First try assigning a free register.
   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+    // If VirtReg got an assignment, the eviction info is no longre relevant.
+    LastEvicted.clearEvicteeInfo(VirtReg.reg);
     // When NewVRegs is not empty, we may have made decisions such as evicting
     // a virtual register, go with the earlier decisions and use the physical
     // register.
@@ -2598,6 +2942,9 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
       // copy-related live-ranges.
       if (Hint && Hint != PhysReg)
         SetOfBrokenHints.insert(&VirtReg);
+      // If VirtReg eviction someone, the eviction info for it as an evictee is
+      // no longre relevant.
+      LastEvicted.clearEvicteeInfo(VirtReg.reg);
       return PhysReg;
     }
 
@@ -2617,8 +2964,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     // Try splitting VirtReg or interferences.
     unsigned NewVRegSizeBefore = NewVRegs.size();
     unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
-    if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
+    if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
+      // If VirtReg got split, the eviction info is no longre relevant.
+      LastEvicted.clearEvicteeInfo(VirtReg.reg);
       return PhysReg;
+    }
   }
 
   // If we couldn't allocate a register from spilling, there is probably some
@@ -2702,17 +3052,20 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
   if (Reloads || FoldedReloads || Spills || FoldedSpills) {
     using namespace ore;
 
-    MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
-                                      L->getStartLoc(), L->getHeader());
-    if (Spills)
-      R << NV("NumSpills", Spills) << " spills ";
-    if (FoldedSpills)
-      R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
-    if (Reloads)
-      R << NV("NumReloads", Reloads) << " reloads ";
-    if (FoldedReloads)
-      R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
-    ORE->emit(R << "generated in loop");
+    ORE->emit([&]() {
+      MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
+                                        L->getStartLoc(), L->getHeader());
+      if (Spills)
+        R << NV("NumSpills", Spills) << " spills ";
+      if (FoldedSpills)
+        R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
+      if (Reloads)
+        R << NV("NumReloads", Reloads) << " reloads ";
+      if (FoldedReloads)
+        R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+      R << "generated in loop";
+      return R;
+    });
   }
 }
 
@@ -2729,6 +3082,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
                         MF->getSubtarget().enableRALocalReassignment(
                             MF->getTarget().getOptLevel());
 
+  EnableAdvancedRASplitCost = ConsiderLocalIntervalCost ||
+                              MF->getSubtarget().enableAdvancedRASplitCost();
+
   if (VerifyEnabled)
     MF->verify(this, "Before greedy register allocator");
 
@@ -2760,6 +3116,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
   GlobalCand.resize(32);  // This will grow as needed.
   SetOfBrokenHints.clear();
+  LastEvicted.clear();
 
   allocatePhysRegs();
   tryHintsRecoloring();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 9778103575fa..351e91c932eb 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -43,7 +43,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -59,6 +59,8 @@
 #include "llvm/CodeGen/PBQPRAConstraint.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
@@ -70,8 +72,6 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Printable.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -668,7 +668,7 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
 
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   (void)TRI;
-  DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
+  DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
                << LRE.getParent().weight << ", New vregs: ");
 
   // Copy any newly inserted live intervals into the list of regs to
@@ -677,7 +677,7 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
        I != E; ++I) {
     const LiveInterval &LI = LIS.getInterval(*I);
     assert(!LI.empty() && "Empty spill range.");
-    DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+    DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");
     VRegsToAlloc.insert(LI.reg);
   }
 
@@ -707,7 +707,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
 
     if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
       unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
-      DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
+      DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
             << TRI.getName(PReg) << "\n");
       assert(PReg != 0 && "Invalid preg selected.");
       VRM.assignVirt2Phys(VReg, PReg);
@@ -799,7 +799,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   findVRegIntervalsToAlloc(MF, LIS);
 
 #ifndef NDEBUG
-  const Function &F = *MF.getFunction();
+  const Function &F = MF.getFunction();
   std::string FullyQualifiedName =
     F.getParent()->getModuleIdentifier() + "." + F.getName().str();
 #endif
@@ -864,7 +864,7 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
     const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
     unsigned VReg = G.getNodeMetadata(NId).getVReg();
     const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
-    OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
+    OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
   });
 }
 
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index 855aa37ff3c3..f49ea25bbf35 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -27,7 +27,7 @@
 #include "llvm/CodeGen/RegisterUsageInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
 
 using namespace llvm;
 
@@ -95,7 +95,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
   unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
   RegMask.resize(RegMaskSize, 0xFFFFFFFF);
 
-  const Function *F = MF.getFunction();
+  const Function &F = MF.getFunction();
 
   PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
 
@@ -127,10 +127,12 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
 
   if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
     const uint32_t *CallPreservedMask =
-        TRI->getCallPreservedMask(MF, F->getCallingConv());
-    // Set callee saved register as preserved.
-    for (unsigned i = 0; i < RegMaskSize; ++i)
-      RegMask[i] = RegMask[i] | CallPreservedMask[i];
+        TRI->getCallPreservedMask(MF, F.getCallingConv());
+    if (CallPreservedMask) {
+      // Set callee saved register as preserved.
+      for (unsigned i = 0; i < RegMaskSize; ++i)
+        RegMask[i] = RegMask[i] | CallPreservedMask[i];
+    }
   } else {
     ++NumCSROpt;
     DEBUG(dbgs() << MF.getName()
@@ -139,11 +141,11 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
 
   for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
     if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
-      DEBUG(dbgs() << TRI->getName(PReg) << " ");
+      DEBUG(dbgs() << printReg(PReg, TRI) << " ");
 
   DEBUG(dbgs() << " \n----------------------------------------\n");
 
-  PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask));
+  PRUI->storeUpdateRegUsageInfo(&F, std::move(RegMask));
 
   return false;
 }
diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp
index 5cc35bfeca63..5b12d00e126f 100644
--- a/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -21,6 +21,7 @@
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -87,14 +88,31 @@ void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+// Assumes call instructions have a single reference to a function.
+static const Function *findCalledFunction(const Module &M, MachineInstr &MI) {
+  for (MachineOperand &MO : MI.operands()) {
+    if (MO.isGlobal())
+      return dyn_cast<Function>(MO.getGlobal());
+
+    if (MO.isSymbol())
+      return M.getFunction(MO.getSymbolName());
+  }
+
+  return nullptr;
+}
+
 bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
-  const Module *M = MF.getFunction()->getParent();
+  const Module *M = MF.getFunction().getParent();
   PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
 
   DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName()
                << " ++++++++++++++++++++  \n");
   DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");
 
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!MFI.hasCalls() && !MFI.hasTailCall())
+    return false;
+
   bool Changed = false;
 
   for (MachineBasicBlock &MBB : MF) {
@@ -113,15 +131,14 @@ bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
         Changed = true;
       };
 
-      MachineOperand &Operand = MI.getOperand(0);
-      if (Operand.isGlobal())
-        UpdateRegMask(cast<Function>(Operand.getGlobal()));
-      else if (Operand.isSymbol())
-        UpdateRegMask(M->getFunction(Operand.getSymbolName()));
+      if (const Function *F = findCalledFunction(*M, MI)) {
+        UpdateRegMask(F);
+      } else {
+        DEBUG(dbgs() << "Failed to find call target function\n");
+      }
 
-      DEBUG(dbgs()
-            << "Call Instruction After Register Usage Info Propagation : \n");
-      DEBUG(dbgs() << MI << "\n");
+      DEBUG(dbgs() << "Call Instruction After Register Usage Info Propagation : "
+            << MI << '\n');
     }
   }
 
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 956dec39fc38..b0eeb81f583e 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -20,13 +20,13 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -153,7 +153,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   DEBUG({
     dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";
     for (unsigned I = 0; I != RCI.NumRegs; ++I)
-      dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+      dbgs() << ' ' << printReg(RCI.Order[I], TRI);
     dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
   });
 
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index a67d07b36474..00a2e93c71ca 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1,4 +1,4 @@
-//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,32 +14,49 @@
 //===----------------------------------------------------------------------===//
 
 #include "RegisterCoalescer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/Value.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
-#include <cmath>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "regalloc"
@@ -53,10 +70,9 @@ STATISTIC(NumInflated , "Number of register classes inflated");
 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
 STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
 
-static cl::opt<bool>
-EnableJoining("join-liveintervals",
-              cl::desc("Coalesce copies (default=true)"),
-              cl::init(true));
+static cl::opt<bool> EnableJoining("join-liveintervals",
+                                   cl::desc("Coalesce copies (default=true)"),
+                                   cl::init(true), cl::Hidden);
 
 static cl::opt<bool> UseTerminalRule("terminal-rule",
                                      cl::desc("Apply the terminal rule"),
@@ -79,11 +95,11 @@ VerifyCoalescing("verify-coalescing",
          cl::Hidden);
 
 namespace {
+
   class RegisterCoalescer : public MachineFunctionPass,
                             private LiveRangeEdit::Delegate {
     MachineFunction* MF;
     MachineRegisterInfo* MRI;
-    const TargetMachine* TM;
     const TargetRegisterInfo* TRI;
     const TargetInstrInfo* TII;
     LiveIntervals *LIS;
@@ -211,9 +227,9 @@ namespace {
     /// flag.
     /// This can happen when undef uses were previously concealed by a copy
     /// which we coalesced. Example:
-    ///    %vreg0:sub0<def,read-undef> = ...
-    ///    %vreg1 = COPY %vreg0       <-- Coalescing COPY reveals undef
-    ///           = use %vreg1:sub1   <-- hidden undef use
+    ///    %0:sub0<def,read-undef> = ...
+    ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
+    ///       = use %1:sub1       <-- hidden undef use
     void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
                       MachineOperand &MO, unsigned SubRegIdx);
 
@@ -248,8 +264,19 @@ namespace {
       }
     }
 
+    /// Wrapper Method to do all the necessary work when an Instruction is
+    /// deleted.
+    /// Optimizations should use this to make sure that deleted instructions
+    /// are always accounted for.
+    void deleteInstr(MachineInstr* MI) {
+      ErasedInstrs.insert(MI);
+      LIS->RemoveMachineInstrFromMaps(*MI);
+      MI->eraseFromParent();
+    }
+
   public:
     static char ID; ///< Class identification, replacement for typeinfo
+
     RegisterCoalescer() : MachineFunctionPass(ID) {
       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     }
@@ -264,8 +291,11 @@ namespace {
     /// Implement the dump method.
     void print(raw_ostream &O, const Module* = nullptr) const override;
   };
+
 } // end anonymous namespace
 
+char RegisterCoalescer::ID = 0;
+
 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
 
 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
@@ -277,8 +307,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
                     "Simple Register Coalescing", false, false)
 
-char RegisterCoalescer::ID = 0;
-
 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
                         unsigned &Src, unsigned &Dst,
                         unsigned &SrcSub, unsigned &DstSub) {
@@ -334,7 +362,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
     Flipped = true;
   }
 
-  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
 
   if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
     // Eliminate DstSub on a physreg.
@@ -540,7 +568,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
   // in IntB, we can merge them.
   if (ValS+1 != BS) return false;
 
-  DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
+  DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
 
   SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
   // We are about to delete CopyMI, so need to remove it as the 'instruction
@@ -616,8 +644,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
 /// Copy segements with value number @p SrcValNo from liverange @p Src to live
 /// range @Dst and use value number @p DstValNo there.
 static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
-                                 const LiveRange &Src, const VNInfo *SrcValNo)
-{
+                                 const LiveRange &Src, const VNInfo *SrcValNo) {
   for (const LiveRange::Segment &S : Src.segments) {
     if (S.valno != SrcValNo)
       continue;
@@ -640,7 +667,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   // its other operand is coalesced to the copy dest register, see if we can
   // transform the copy into a noop by commuting the definition. For example,
   //
-  //  A3 = op A2 B0<kill>
+  //  A3 = op A2 killed B0
   //    ...
   //  B1 = A3      <- this copy
   //    ...
@@ -648,7 +675,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   //
   // ==>
   //
-  //  B2 = op B0 A2<kill>
+  //  B2 = op B0 killed A2
   //    ...
   //  B1 = B2      <- now an identity copy
   //    ...
@@ -741,7 +768,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   // ...
   // B = A
   // ...
-  // C = A<kill>
+  // C = killed A
   // ...
   //   = B
 
@@ -797,9 +824,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
       S.MergeValueNumberInto(SubDVNI, SubBValNo);
     }
 
-    ErasedInstrs.insert(UseMI);
-    LIS->RemoveMachineInstrFromMaps(*UseMI);
-    UseMI->eraseFromParent();
+    deleteInstr(UseMI);
   }
 
   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
@@ -966,8 +991,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
 
   // Now ok to move copy.
   if (CopyLeftBB) {
-    DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#"
-                 << CopyLeftBB->getNumber() << '\t' << CopyMI);
+    DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
+                 << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
 
     // Insert new copy to CopyLeftBB.
     auto InsPos = CopyLeftBB->getFirstTerminator();
@@ -985,18 +1010,16 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
     // the deleted list.
     ErasedInstrs.erase(NewCopyMI);
   } else {
-    DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
-                 << MBB.getNumber() << '\t' << CopyMI);
+    DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
+                 << printMBBReference(MBB) << '\t' << CopyMI);
   }
 
   // Remove CopyMI.
   // Note: This is fine to remove the copy before updating the live-ranges.
   // While updating the live-ranges, we only look at slot indices and
   // never go back to the instruction.
-  LIS->RemoveMachineInstrFromMaps(CopyMI);
   // Mark instructions as deleted.
-  ErasedInstrs.insert(&CopyMI);
-  CopyMI.eraseFromParent();
+  deleteInstr(&CopyMI);
 
   // Update the liveness.
   SmallVector<SlotIndex, 8> EndPoints;
@@ -1119,10 +1142,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
   NewMI.setDebugLoc(DL);
 
   // In a situation like the following:
-  //     %vreg0:subreg = instr              ; DefMI, subreg = DstIdx
-  //     %vreg1        = copy %vreg0:subreg ; CopyMI, SrcIdx = 0
-  // instead of widening %vreg1 to the register class of %vreg0 simply do:
-  //     %vreg1 = instr
+  //     %0:subreg = instr              ; DefMI, subreg = DstIdx
+  //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
+  // instead of widening %1 to the register class of %0 simply do:
+  //     %1 = instr
   const TargetRegisterClass *NewRC = CP.getNewRC();
   if (DstIdx != 0) {
     MachineOperand &DefMO = NewMI.getOperand(0);
@@ -1202,12 +1225,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
     // This could happen if the rematerialization instruction is rematerializing
     // more than actually is used in the register.
     // An example would be:
-    // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
+    // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
     // ; Copying only part of the register here, but the rest is undef.
-    // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit
+    // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
     // ==>
     // ; Materialize all the constants but only using one
-    // vreg2 = LOAD_CONSTANTS 5, 8
+    // %2 = LOAD_CONSTANTS 5, 8
     //
     // at this point for the part that wasn't defined before we could have
     // subranges missing the definition.
@@ -1230,11 +1253,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
 
     // Make sure that the subrange for resultant undef is removed
     // For example:
-    //   vreg1:sub1<def,read-undef> = LOAD CONSTANT 1
-    //   vreg2<def> = COPY vreg1
+    //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
+    //   %2 = COPY %1
     // ==>
-    //   vreg2:sub1<def, read-undef> = LOAD CONSTANT 1
-    //     ; Correct but need to remove the subrange for vreg2:sub0
+    //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
+    //     ; Correct but need to remove the subrange for %2:sub0
     //     ; as it is now undef
     if (NewIdx != 0 && DstInt.hasSubRanges()) {
       // The affected subregister segments can be removed.
@@ -1268,15 +1291,15 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
     // Otherwise, variables that live through may miss some
     // interferences, thus creating invalid allocation.
     // E.g., i386 code:
-    // vreg1 = somedef ; vreg1 GR8
-    // vreg2 = remat ; vreg2 GR32
-    // CL = COPY vreg2.sub_8bit
-    // = somedef vreg1 ; vreg1 GR8
+    // %1 = somedef ; %1 GR8
+    // %2 = remat ; %2 GR32
+    // CL = COPY %2.sub_8bit
+    // = somedef %1 ; %1 GR8
     // =>
-    // vreg1 = somedef ; vreg1 GR8
-    // ECX<def, dead> = remat ; CL<imp-def>
-    // = somedef vreg1 ; vreg1 GR8
-    // vreg1 will see the inteferences with CL but not with CH since
+    // %1 = somedef ; %1 GR8
+    // dead ECX = remat ; implicit-def CL
+    // = somedef %1 ; %1 GR8
+    // %1 will see the inteferences with CL but not with CH since
     // no live-ranges would have been created for ECX.
     // Fix that!
     SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
@@ -1313,6 +1336,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
       MachineInstr *UseMI = UseMO.getParent();
       if (UseMI->isDebugValue()) {
         UseMO.setReg(DstReg);
+        // Move the debug value directly after the def of the rematerialized
+        // value in DstReg.
+        MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
         DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
       }
     }
@@ -1326,9 +1352,9 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
   // local variables. When we have a copy like:
   //
-  //   %vreg1 = COPY %vreg2<undef>
+  //   %1 = COPY undef %2
   //
-  // We delete the copy and remove the corresponding value number from %vreg1.
+  // We delete the copy and remove the corresponding value number from %1.
   // Any uses of that value number are marked as <undef>.
 
   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
@@ -1540,7 +1566,6 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
 }
 
 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
-
   Again = false;
   DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
 
@@ -1560,7 +1585,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
       std::swap(SrcRC, DstRC);
     }
     if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
-                            CP.getNewRC())) {
+                             CP.getNewRC(), *LIS)) {
       DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
       return false;
     }
@@ -1578,8 +1603,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 
   // Eliminate undefs.
   if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
-    LIS->RemoveMachineInstrFromMaps(*CopyMI);
-    CopyMI->eraseFromParent();
+    deleteInstr(CopyMI);
     return false;  // Not coalescable.
   }
 
@@ -1607,15 +1631,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
       }
       DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
     }
-    LIS->RemoveMachineInstrFromMaps(*CopyMI);
-    CopyMI->eraseFromParent();
+    deleteInstr(CopyMI);
     return true;
   }
 
   // Enforce policies.
   if (CP.isPhys()) {
-    DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
-                 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
+    DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI)
+                 << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx())
                  << '\n');
     if (!canJoinPhys(CP)) {
       // Before giving up coalescing, if definition of source is defined by
@@ -1637,13 +1660,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
       dbgs() << "\tConsidering merging to "
              << TRI->getRegClassName(CP.getNewRC()) << " with ";
       if (CP.getDstIdx() && CP.getSrcIdx())
-        dbgs() << PrintReg(CP.getDstReg()) << " in "
+        dbgs() << printReg(CP.getDstReg()) << " in "
                << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
-               << PrintReg(CP.getSrcReg()) << " in "
+               << printReg(CP.getSrcReg()) << " in "
                << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
       else
-        dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
-               << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+        dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
+               << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
     });
   }
 
@@ -1668,8 +1691,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     if (!CP.isPartial() && !CP.isPhys()) {
       if (adjustCopiesBackFrom(CP, CopyMI) ||
           removeCopyByCommutingDef(CP, CopyMI)) {
-        LIS->RemoveMachineInstrFromMaps(*CopyMI);
-        CopyMI->eraseFromParent();
+        deleteInstr(CopyMI);
         DEBUG(dbgs() << "\tTrivial!\n");
         return true;
       }
@@ -1735,11 +1757,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
 
   DEBUG({
-    dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
-           << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
+    dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
+           << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
     dbgs() << "\tResult = ";
     if (CP.isPhys())
-      dbgs() << PrintReg(CP.getDstReg(), TRI);
+      dbgs() << printReg(CP.getDstReg(), TRI);
     else
       dbgs() << LIS->getInterval(CP.getDstReg());
     dbgs() << '\n';
@@ -1774,7 +1796,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
           return false;
       }
       if (RHS.overlaps(LIS->getRegUnit(*UI))) {
-        DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+        DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n');
         return false;
       }
     }
@@ -1797,20 +1819,20 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   MachineInstr *CopyMI;
   if (CP.isFlipped()) {
     // Physreg is copied into vreg
-    //   %vregY = COPY %X
-    //   ...  //< no other def of %X here
-    //   use %vregY
+    //   %y = COPY %physreg_x
+    //   ...  //< no other def of %x here
+    //   use %y
     // =>
     //   ...
-    //   use %X
+    //   use %x
     CopyMI = MRI->getVRegDef(SrcReg);
   } else {
     // VReg is copied into physreg:
-    //   %vregX = def
-    //   ... //< no other def or use of %Y here
-    //   %Y = COPY %vregX
+    //   %y = def
+    //   ... //< no other def or use of %y here
+    //   %y = COPY %physreg_x
     // =>
-    //   %Y = def
+    //   %y = def
     //   ...
     if (!MRI->hasOneNonDBGUse(SrcReg)) {
       DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
@@ -1829,7 +1851,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
 
     if (!MRI->isConstantPhysReg(DstReg)) {
       // We checked above that there are no interfering defs of the physical
-      // register. However, for this case, where we intent to move up the def of
+      // register. However, for this case, where we intend to move up the def of
       // the physical register, we also need to check for interfering uses.
       SlotIndexes *Indexes = LIS->getSlotIndexes();
       for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
@@ -1844,7 +1866,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
 
     // We're going to remove the copy which defines a physical reserved
     // register, so remove its valno, etc.
-    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI)
+    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI)
           << " at " << CopyRegIdx << "\n");
 
     LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
@@ -1855,8 +1877,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
     }
   }
 
-  LIS->RemoveMachineInstrFromMaps(*CopyMI);
-  CopyMI->eraseFromParent();
+  deleteInstr(CopyMI);
 
   // We don't track kills for reserved registers.
   MRI->clearKillFlags(CP.getSrcReg());
@@ -1906,7 +1927,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
 //
 //      %dst:ssub0<def,read-undef> = FOO
 //      %src = BAR
-//      %dst:ssub1<def> = COPY %src
+//      %dst:ssub1 = COPY %src
 //
 //    The live range of %src overlaps the %dst value defined by FOO, but
 //    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
@@ -1921,14 +1942,15 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
 //    is live, but never read. This can happen because we don't compute
 //    individual live ranges per lane.
 //
-//      %dst<def> = FOO
+//      %dst = FOO
 //      %src = BAR
-//      %dst:ssub1<def> = COPY %src
+//      %dst:ssub1 = COPY %src
 //
 //    This kind of interference is only resolved locally. If the clobbered
 //    lane value escapes the block, the join is aborted.
 
 namespace {
+
 /// Track information about values in a single virtual register about to be
 /// joined. Objects of this class are always created in pairs - one for each
 /// side of the CoalescerPair (or one for each lane of a side of the coalescer
@@ -1936,6 +1958,7 @@ namespace {
 class JoinVals {
   /// Live range we work on.
   LiveRange &LR;
+
   /// (Main) register we work on.
   const unsigned Reg;
 
@@ -1943,6 +1966,7 @@ class JoinVals {
   /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
   /// CP.SrcIdx.
   const unsigned SubIdx;
+
   /// The LaneMask that this liverange will occupy the coalesced register. May
   /// be smaller than the lanemask produced by SubIdx when merging subranges.
   const LaneBitmask LaneMask;
@@ -1950,6 +1974,7 @@ class JoinVals {
   /// This is true when joining sub register ranges, false when joining main
   /// ranges.
   const bool SubRangeJoin;
+
   /// Whether the current LiveInterval tracks subregister liveness.
   const bool TrackSubRegLiveness;
 
@@ -1997,7 +2022,7 @@ class JoinVals {
   /// joined register, so they can be compared directly between SrcReg and
   /// DstReg.
   struct Val {
-    ConflictResolution Resolution;
+    ConflictResolution Resolution = CR_Keep;
 
     /// Lanes written by this def, 0 for unanalyzed values.
     LaneBitmask WriteLanes;
@@ -2007,10 +2032,10 @@ class JoinVals {
     LaneBitmask ValidLanes;
 
     /// Value in LI being redefined by this def.
-    VNInfo *RedefVNI;
+    VNInfo *RedefVNI = nullptr;
 
     /// Value in the other live range that overlaps this def, if any.
-    VNInfo *OtherVNI;
+    VNInfo *OtherVNI = nullptr;
 
     /// Is this value an IMPLICIT_DEF that can be erased?
     ///
@@ -2023,18 +2048,16 @@ class JoinVals {
     /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
     /// longer live ranges. Such IMPLICIT_DEF values should be treated like
     /// normal values.
-    bool ErasableImplicitDef;
+    bool ErasableImplicitDef = false;
 
     /// True when the live range of this value will be pruned because of an
     /// overlapping CR_Replace value in the other live range.
-    bool Pruned;
+    bool Pruned = false;
 
     /// True once Pruned above has been computed.
-    bool PrunedComputed;
+    bool PrunedComputed = false;
 
-    Val() : Resolution(CR_Keep), WriteLanes(), ValidLanes(),
-            RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
-            Pruned(false), PrunedComputed(false) {}
+    Val() = default;
 
     bool isAnalyzed() const { return WriteLanes.any(); }
   };
@@ -2081,8 +2104,9 @@ class JoinVals {
   /// entry to TaintedVals.
   ///
   /// Returns false if the tainted lanes extend beyond the basic block.
-  bool taintExtent(unsigned, LaneBitmask, JoinVals&,
-                   SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&);
+  bool
+  taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+              SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
 
   /// Return true if MI uses any of the given Lanes from Reg.
   /// This does not include partial redefinitions of Reg.
@@ -2104,8 +2128,7 @@ public:
     : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
       SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
       NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
-      TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums())
-  {}
+      TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
 
   /// Analyze defs in LR and compute a value mapping in NewVNInfo.
   /// Returns false if any conflicts were impossible to resolve.
@@ -2149,6 +2172,7 @@ public:
   /// Get the value assignments suitable for passing to LiveInterval::join.
   const int *getAssignments() const { return Assignments.data(); }
 };
+
 } // end anonymous namespace
 
 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
@@ -2239,7 +2263,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   const MachineInstr *DefMI = nullptr;
   if (VNI->isPHIDef()) {
     // Conservatively assume that all lanes in a PHI are valid.
-    LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1)
+    LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
                                      : TRI->getSubRegIndexLaneMask(SubIdx);
     V.ValidLanes = V.WriteLanes = Lanes;
   } else {
@@ -2247,7 +2271,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     assert(DefMI != nullptr);
     if (SubRangeJoin) {
       // We don't care about the lanes when joining subregister ranges.
-      V.WriteLanes = V.ValidLanes = LaneBitmask(1);
+      V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
       if (DefMI->isImplicitDef()) {
         V.ValidLanes = LaneBitmask::getNone();
         V.ErasableImplicitDef = true;
@@ -2263,7 +2287,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
       //
       // This adds ssub1 to the set of valid lanes in %src:
       //
-      //   %src:ssub1<def> = FOO
+      //   %src:ssub1 = FOO
       //
       // This leaves only ssub1 valid, making any other lanes undef:
       //
@@ -2352,7 +2376,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   if (OtherV.ErasableImplicitDef && DefMI &&
       DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
     DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
-                 << " extends into BB#" << DefMI->getParent()->getNumber()
+                 << " extends into " << printMBBReference(*DefMI->getParent())
                  << ", keeping it.\n");
     OtherV.ErasableImplicitDef = false;
   }
@@ -2401,9 +2425,9 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   //
   //   1 %dst:ssub0 = FOO                <-- OtherVNI
   //   2 %src = BAR                      <-- VNI
-  //   3 %dst:ssub1 = COPY %src<kill>    <-- Eliminate this copy.
-  //   4 BAZ %dst<kill>
-  //   5 QUUX %src<kill>
+  //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
+  //   4 BAZ killed %dst
+  //   5 QUUX killed %src
   //
   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
   // handles this complex value mapping.
@@ -2413,7 +2437,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   // If the other live range is killed by DefMI and the live ranges are still
   // overlapping, it must be because we're looking at an early clobber def:
   //
-  //   %dst<def,early-clobber> = ASM %src<kill>
+  //   %dst<def,early-clobber> = ASM killed %src
   //
   // In this case, it is illegal to merge the two live ranges since the early
   // clobber def would clobber %src before it was read.
@@ -2463,9 +2487,9 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
     assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
     assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
     Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
-    DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@'
+    DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
                  << LR.getValNumInfo(ValNo)->def << " into "
-                 << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
+                 << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
                  << V.OtherVNI->def << " --> @"
                  << NewVNInfo[Assignments[ValNo]]->def << '\n');
     break;
@@ -2493,7 +2517,7 @@ bool JoinVals::mapValues(JoinVals &Other) {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     computeAssignment(i, Other);
     if (Vals[i].Resolution == CR_Impossible) {
-      DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i
+      DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
                    << '@' << LR.getValNumInfo(i)->def << '\n');
       return false;
     }
@@ -2503,7 +2527,7 @@ bool JoinVals::mapValues(JoinVals &Other) {
 
 bool JoinVals::
 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
-            SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) {
+            SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
   VNInfo *VNI = LR.getValNumInfo(ValNo);
   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
@@ -2516,11 +2540,11 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
     // lanes escape the block.
     SlotIndex End = OtherI->end;
     if (End >= MBBEnd) {
-      DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':'
+      DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
                    << OtherI->valno->id << '@' << OtherI->start << '\n');
       return false;
     }
-    DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':'
+    DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
                  << OtherI->valno->id << '@' << OtherI->start
                  << " to " << End << '\n');
     // A dead def is not a problem.
@@ -2560,10 +2584,10 @@ bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
 bool JoinVals::resolveConflicts(JoinVals &Other) {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     Val &V = Vals[i];
-    assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+    assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
     if (V.Resolution != CR_Unresolved)
       continue;
-    DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i
+    DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i
                  << '@' << LR.getValNumInfo(i)->def << '\n');
     if (SubRangeJoin)
       return false;
@@ -2598,7 +2622,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
       Indexes->getInstructionFromIndex(TaintExtent.front().first);
     assert(LastMI && "Range must end at a proper instruction");
     unsigned TaintNum = 0;
-    for (;;) {
+    while (true) {
       assert(MI != MBB->end() && "Bad LastMI");
       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
         DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
@@ -2658,13 +2682,13 @@ void JoinVals::pruneValues(JoinVals &Other,
       if (!Def.isBlock()) {
         if (changeInstrs) {
           // Remove <def,read-undef> flags. This def is now a partial redef.
-          // Also remove <def,dead> flags since the joined live range will
+          // Also remove dead flags since the joined live range will
           // continue past this instruction.
           for (MachineOperand &MO :
                Indexes->getInstructionFromIndex(Def)->operands()) {
             if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
-              if (MO.getSubReg() != 0)
-                MO.setIsUndef(EraseImpDef);
+              if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
+                MO.setIsUndef(false);
               MO.setIsDead(false);
             }
           }
@@ -2674,7 +2698,7 @@ void JoinVals::pruneValues(JoinVals &Other,
         if (!EraseImpDef)
           EndPoints.push_back(Def);
       }
-      DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def
+      DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
                    << ": " << Other.LR << '\n');
       break;
     }
@@ -2686,7 +2710,7 @@ void JoinVals::pruneValues(JoinVals &Other,
         // computeAssignment(), the value that was originally copied could have
         // been replaced.
         LIS->pruneValue(LR, Def, &EndPoints);
-        DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at "
+        DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
                      << Def << ": " << LR << '\n');
       }
       break;
@@ -2994,7 +3018,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
         R.LaneMask = Mask;
       }
     }
-    DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg())
+    DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg())
                  << ' ' << LHS << '\n');
 
     // Determine lanemasks of RHS in the coalesced register and merge subranges.
@@ -3068,6 +3092,7 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
 }
 
 namespace {
+
 /// Information concerning MBB coalescing priority.
 struct MBBPriorityInfo {
   MachineBasicBlock *MBB;
@@ -3077,7 +3102,8 @@ struct MBBPriorityInfo {
   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
     : MBB(mbb), Depth(depth), IsSplit(issplit) {}
 };
-}
+
+} // end anonymous namespace
 
 /// C-style comparator that sorts first based on the loop depth of the basic
 /// block (the unsigned), and then on the MBB number.
@@ -3194,7 +3220,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
       continue;
     // Check that OtherReg interfere with DstReg.
     if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
-      DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
+      DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n');
       return true;
     }
   }
@@ -3281,7 +3307,7 @@ void RegisterCoalescer::joinAllIntervals() {
   array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
 
   // Coalesce intervals in MBB priority order.
-  unsigned CurrDepth = UINT_MAX;
+  unsigned CurrDepth = std::numeric_limits<unsigned>::max();
   for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
     // Try coalescing the collected local copies for deeper loops.
     if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
@@ -3308,7 +3334,6 @@ void RegisterCoalescer::releaseMemory() {
 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   MF = &fn;
   MRI = &fn.getRegInfo();
-  TM = &fn.getTarget();
   const TargetSubtargetInfo &STI = fn.getSubtarget();
   TRI = STI.getRegisterInfo();
   TII = STI.getInstrInfo();
@@ -3349,7 +3374,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
     if (MRI->reg_nodbg_empty(Reg))
       continue;
     if (MRI->recomputeRegClass(Reg)) {
-      DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+      DEBUG(dbgs() << printReg(Reg) << " inflated to "
                    << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
       ++NumInflated;
 
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 04067a1427af..1a46f6d053e6 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
+//===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,10 +17,9 @@
 
 namespace llvm {
 
-  class MachineInstr;
-  class TargetRegisterInfo;
-  class TargetRegisterClass;
-  class TargetInstrInfo;
+class MachineInstr;
+class TargetRegisterClass;
+class TargetRegisterInfo;
 
   /// A helper class for register coalescers. When deciding if
   /// two registers can be coalesced, CoalescerPair can determine if a copy
@@ -30,43 +29,40 @@ namespace llvm {
 
     /// The register that will be left after coalescing. It can be a
     /// virtual or physical register.
-    unsigned DstReg;
+    unsigned DstReg = 0;
 
     /// The virtual register that will be coalesced into dstReg.
-    unsigned SrcReg;
+    unsigned SrcReg = 0;
 
     /// The sub-register index of the old DstReg in the new coalesced register.
-    unsigned DstIdx;
+    unsigned DstIdx = 0;
 
     /// The sub-register index of the old SrcReg in the new coalesced register.
-    unsigned SrcIdx;
+    unsigned SrcIdx = 0;
 
     /// True when the original copy was a partial subregister copy.
-    bool Partial;
+    bool Partial = false;
 
     /// True when both regs are virtual and newRC is constrained.
-    bool CrossClass;
+    bool CrossClass = false;
 
     /// True when DstReg and SrcReg are reversed from the original
     /// copy instruction.
-    bool Flipped;
+    bool Flipped = false;
 
     /// The register class of the coalesced register, or NULL if DstReg
     /// is a physreg. This register class may be a super-register of both
     /// SrcReg and DstReg.
-    const TargetRegisterClass *NewRC;
+    const TargetRegisterClass *NewRC = nullptr;
 
   public:
-    CoalescerPair(const TargetRegisterInfo &tri)
-      : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
-        Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
+    CoalescerPair(const TargetRegisterInfo &tri) : TRI(tri) {}
 
     /// Create a CoalescerPair representing a virtreg-to-physreg copy.
     /// No need to call setRegisters().
     CoalescerPair(unsigned VirtReg, unsigned PhysReg,
                   const TargetRegisterInfo &tri)
-      : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
-        Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
+      : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {}
 
     /// Set registers to match the copy instruction MI. Return
     /// false if MI is not a coalescable copy instruction.
@@ -111,6 +107,7 @@ namespace llvm {
     /// Return the register class of the coalesced register.
     const TargetRegisterClass *getNewRC() const { return NewRC; }
   };
-} // End llvm namespace
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 88e0a3b58940..9ac810c7c723 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -17,7 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -26,14 +26,14 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -97,7 +97,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dumpRegSetPressure(MaxSetPressure, TRI);
   dbgs() << "Live In: ";
   for (const RegisterMaskPair &P : LiveInRegs) {
-    dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+    dbgs() << printVRegOrUnit(P.RegUnit, TRI);
     if (!P.LaneMask.all())
       dbgs() << ':' << PrintLaneMask(P.LaneMask);
     dbgs() << ' ';
@@ -105,7 +105,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << '\n';
   dbgs() << "Live Out: ";
   for (const RegisterMaskPair &P : LiveOutRegs) {
-    dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+    dbgs() << printVRegOrUnit(P.RegUnit, TRI);
     if (!P.LaneMask.all())
       dbgs() << ':' << PrintLaneMask(P.LaneMask);
     dbgs() << ' ';
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index fc5105aadbff..97967124add6 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -16,30 +16,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/RegisterScavenging.h"
-
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
 #include <limits>
 #include <string>
+#include <utility>
 
 using namespace llvm;
 
@@ -210,7 +213,7 @@ void RegScavenger::forward() {
         continue;
       if (!isRegUsed(Reg)) {
         // Check if it's partial live: e.g.
-        // D0 = insert_subreg D0<undef>, S0
+        // D0 = insert_subreg undef D0, S0
         // ... D0
         // The problem is the insert_subreg could be eliminated. The use of
         // D0 is using a partially undef value. This is not *incorrect* since
@@ -285,8 +288,8 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
 unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
   for (unsigned Reg : *RC) {
     if (!isRegUsed(Reg)) {
-      DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(Reg) <<
-            "\n");
+      DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI)
+                   << "\n");
       return Reg;
     }
   }
@@ -460,7 +463,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
                     MachineBasicBlock::iterator &UseMI) {
   // Find an available scavenging slot with size and alignment matching
   // the requirements of the class RC.
-  const MachineFunction &MF = *Before->getParent()->getParent();
+  const MachineFunction &MF = *Before->getMF();
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned NeedSize = TRI->getSpillSize(RC);
   unsigned NeedAlign = TRI->getSpillAlignment(RC);
@@ -533,7 +536,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
                                         MachineBasicBlock::iterator I,
                                         int SPAdj) {
   MachineInstr &MI = *I;
-  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineFunction &MF = *MI.getMF();
   // Consider all allocatable registers in the register class initially
   BitVector Candidates = TRI->getAllocatableSet(MF, RC);
 
@@ -558,15 +561,15 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
 
   // If we found an unused register there is no reason to spill it.
   if (!isRegUsed(SReg)) {
-    DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+    DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n");
     return SReg;
   }
 
   ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
   Scavenged.Restore = &*std::prev(UseMI);
 
-  DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
-        "\n");
+  DEBUG(dbgs() << "Scavenged register (with spill): " << printReg(SReg, TRI)
+               << "\n");
 
   return SReg;
 }
@@ -595,10 +598,10 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
     ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
     Scavenged.Restore = &*std::prev(SpillBefore);
     LiveUnits.removeReg(Reg);
-    DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI)
-          << " until " << *SpillBefore);
+    DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
+                 << " until " << *SpillBefore);
   } else {
-    DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n');
+    DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n');
   }
   return Reg;
 }
@@ -769,13 +772,16 @@ void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
 }
 
 namespace {
+
 /// This class runs register scavenging independ of the PrologEpilogInserter.
 /// This is used in for testing.
 class ScavengerTest : public MachineFunctionPass {
 public:
   static char ID;
+
   ScavengerTest() : MachineFunctionPass(ID) {}
-  bool runOnMachineFunction(MachineFunction &MF) {
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
     const TargetSubtargetInfo &STI = MF.getSubtarget();
     const TargetFrameLowering &TFL = *STI.getFrameLowering();
 
@@ -792,9 +798,10 @@ public:
     return true;
   }
 };
-char ScavengerTest::ID;
 
 } // end anonymous namespace
 
+char ScavengerTest::ID;
+
 INITIALIZE_PASS(ScavengerTest, "scavenger-test",
                 "Scavenge virtual registers inside basic blocks", false, false)
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index 30757f070cad..4e42deb406e1 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -12,17 +12,17 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -97,7 +97,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
 
     for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
       if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg))
-        OS << TRI->getName(PReg) << " ";
+        OS << printReg(PReg, TRI) << " ";
     }
     OS << "\n";
   }
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index bd5ecbd28f29..1e1f36a35ecc 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -10,32 +10,32 @@
 /// Rename independent subregisters looks for virtual registers with
 /// independently used subregisters and renames them to new virtual registers.
 /// Example: In the following:
-///   %vreg0:sub0<read-undef> = ...
-///   %vreg0:sub1 = ...
-///   use %vreg0:sub0
-///   %vreg0:sub0 = ...
-///   use %vreg0:sub0
-///   use %vreg0:sub1
+///   %0:sub0<read-undef> = ...
+///   %0:sub1 = ...
+///   use %0:sub0
+///   %0:sub0 = ...
+///   use %0:sub0
+///   use %0:sub1
 /// sub0 and sub1 are never used together, and we have two independent sub0
 /// definitions. This pass will rename to:
-///   %vreg0:sub0<read-undef> = ...
-///   %vreg1:sub1<read-undef> = ...
-///   use %vreg1:sub1
-///   %vreg2:sub1<read-undef> = ...
-///   use %vreg2:sub1
-///   use %vreg0:sub0
+///   %0:sub0<read-undef> = ...
+///   %1:sub1<read-undef> = ...
+///   use %1:sub1
+///   %2:sub1<read-undef> = ...
+///   use %2:sub1
+///   use %0:sub0
 //
 //===----------------------------------------------------------------------===//
 
 #include "LiveRangeUtils.h"
 #include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 
 using namespace llvm;
 
@@ -134,15 +134,15 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
   const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
   SmallVector<LiveInterval*, 4> Intervals;
   Intervals.push_back(&LI);
-  DEBUG(dbgs() << PrintReg(Reg) << ": Found " << Classes.getNumClasses()
+  DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses()
         << " equivalence classes.\n");
-  DEBUG(dbgs() << PrintReg(Reg) << ": Splitting into newly created:");
+  DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");
   for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
        ++I) {
     unsigned NewVReg = MRI->createVirtualRegister(RegClass);
     LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
     Intervals.push_back(&NewLI);
-    DEBUG(dbgs() << ' ' << PrintReg(NewVReg));
+    DEBUG(dbgs() << ' ' << printReg(NewVReg));
   }
   DEBUG(dbgs() << '\n');
 
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index 01b3db43b283..f1885aa74285 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -51,7 +51,7 @@ namespace {
         ++NumFunctionsReset;
         MF.reset();
         if (EmitFallbackDiag) {
-          const Function &F = *MF.getFunction();
+          const Function &F = MF.getFunction();
           DiagnosticInfoISelFallback DiagFallback(F);
           F.getContext().diagnose(DiagFallback);
         }
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index 8584a9b7c897..51233be521be 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -1,4 +1,4 @@
-//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===//
+//===- SafeStack.cpp - Safe Stack Insertion -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,37 +17,56 @@
 
 #include "SafeStackColoring.h"
 #include "SafeStackLayout.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_os_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
 
 using namespace llvm;
 using namespace llvm::safestack;
@@ -255,16 +274,16 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
       assert(V == UI.get());
 
       switch (I->getOpcode()) {
-      case Instruction::Load: {
+      case Instruction::Load:
         if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr,
                           AllocaSize))
           return false;
         break;
-      }
+
       case Instruction::VAArg:
         // "va-arg" from a pointer is safe.
         break;
-      case Instruction::Store: {
+      case Instruction::Store:
         if (V == I->getOperand(0)) {
           // Stored the pointer - conservatively assume it may be unsafe.
           DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
@@ -276,11 +295,10 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
                           AllocaPtr, AllocaSize))
           return false;
         break;
-      }
-      case Instruction::Ret: {
+
+      case Instruction::Ret:
         // Information leak.
         return false;
-      }
 
       case Instruction::Call:
       case Instruction::Invoke: {
@@ -372,7 +390,7 @@ void SafeStack::findInsts(Function &F,
       StackRestorePoints.push_back(LP);
     } else if (auto II = dyn_cast<IntrinsicInst>(&I)) {
       if (II->getIntrinsicID() == Intrinsic::gcroot)
-        llvm::report_fatal_error(
+        report_fatal_error(
             "gcroot intrinsic not compatible with safestack attribute");
     }
   }
@@ -540,7 +558,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
 
     // Replace alloc with the new location.
     replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
-                      /*Deref=*/false, -Offset);
+                      DIExpression::NoDeref, -Offset, DIExpression::NoDeref);
     Arg->replaceAllUsesWith(NewArg);
     IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
     IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
@@ -555,7 +573,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
     if (Size == 0)
       Size = 1; // Don't create zero-sized stack objects.
 
-    replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset);
+    replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref,
+                               -Offset, DIExpression::NoDeref);
     replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
 
     // Replace uses of the alloca with the new location.
@@ -645,7 +664,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
     if (AI->hasName() && isa<Instruction>(NewAI))
       NewAI->takeName(AI);
 
-    replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false);
+    replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0,
+                               DIExpression::NoDeref);
     AI->replaceAllUsesWith(NewAI);
     AI->eraseFromParent();
   }
@@ -764,11 +784,12 @@ bool SafeStack::run() {
 }
 
 class SafeStackLegacyPass : public FunctionPass {
-  const TargetMachine *TM;
+  const TargetMachine *TM = nullptr;
 
 public:
   static char ID; // Pass identification, replacement for typeid..
-  SafeStackLegacyPass() : FunctionPass(ID), TM(nullptr) {
+
+  SafeStackLegacyPass() : FunctionPass(ID) {
     initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
@@ -817,9 +838,10 @@ public:
   }
 };
 
-} // anonymous namespace
+} // end anonymous namespace
 
 char SafeStackLegacyPass::ID = 0;
+
 INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE,
                       "Safe Stack instrumentation pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 21f2fa497233..072e6e090e1e 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -1,4 +1,4 @@
-//===-- SafeStackColoring.cpp - SafeStack frame coloring -------*- C++ -*--===//
+//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,12 +8,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "SafeStackColoring.h"
-
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/User.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <tuple>
+#include <utility>
 
 using namespace llvm;
 using namespace llvm::safestack;
diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h
index 08b179ccb7f1..902e63ebeb7e 100644
--- a/lib/CodeGen/SafeStackColoring.h
+++ b/lib/CodeGen/SafeStackColoring.h
@@ -1,4 +1,4 @@
-//===-- SafeStackColoring.h - SafeStack frame coloring ---------*- C++ -*--===//
+//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,16 +10,23 @@
 #ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
 #define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
 
 namespace llvm {
-class AllocaInst;
+
+class BasicBlock;
+class Function;
+class Instruction;
 
 namespace safestack {
+
 /// Compute live ranges of allocas.
 /// Live ranges are represented as sets of "interesting" instructions, which are
 /// defined as instructions that may start or end an alloca's lifetime. These
@@ -35,10 +42,13 @@ class StackColoring {
   struct BlockLifetimeInfo {
     /// Which slots BEGINs in each basic block.
     BitVector Begin;
+
     /// Which slots ENDs in each basic block.
     BitVector End;
+
     /// Which slots are marked as LIVE_IN, coming into each basic block.
     BitVector LiveIn;
+
     /// Which slots are marked as LIVE_OUT, coming out of each basic block.
     BitVector LiveOut;
   };
@@ -48,11 +58,14 @@ public:
   /// live.
   struct LiveRange {
     BitVector bv;
+
     void SetMaximum(int size) { bv.resize(size); }
     void AddRange(unsigned start, unsigned end) { bv.set(start, end); }
+
     bool Overlaps(const LiveRange &Other) const {
       return bv.anyCommon(Other.bv);
     }
+
     void Join(const LiveRange &Other) { bv |= Other.bv; }
   };
 
@@ -60,13 +73,15 @@ private:
   Function &F;
 
   /// Maps active slots (per bit) for each basic block.
-  typedef DenseMap<BasicBlock *, BlockLifetimeInfo> LivenessMap;
+  using LivenessMap = DenseMap<BasicBlock *, BlockLifetimeInfo>;
   LivenessMap BlockLiveness;
 
   /// Number of interesting instructions.
-  int NumInst;
+  int NumInst = -1;
+
   /// Numeric ids for interesting instructions.
   DenseMap<Instruction *, unsigned> InstructionNumbering;
+
   /// A range [Start, End) of instruction ids for each basic block.
   /// Instructions inside each BB have monotonic and consecutive ids.
   DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
@@ -74,6 +89,7 @@ private:
   ArrayRef<AllocaInst *> Allocas;
   unsigned NumAllocas;
   DenseMap<AllocaInst *, unsigned> AllocaNumbering;
+
   /// LiveRange for allocas.
   SmallVector<LiveRange, 8> LiveRanges;
 
@@ -101,7 +117,7 @@ private:
 
 public:
   StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas)
-      : F(F), NumInst(-1), Allocas(Allocas), NumAllocas(Allocas.size()) {}
+      : F(F), Allocas(Allocas), NumAllocas(Allocas.size()) {}
 
   void run();
   void removeAllMarkers();
@@ -143,7 +159,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return OS << R.bv;
 }
 
-} // namespace safestack
-} // namespace llvm
+} // end namespace safestack
+
+} // end namespace llvm
 
 #endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp
index 7d4dbd13abf4..b1759359e46f 100644
--- a/lib/CodeGen/SafeStackLayout.cpp
+++ b/lib/CodeGen/SafeStackLayout.cpp
@@ -1,4 +1,4 @@
-//===-- SafeStackLayout.cpp - SafeStack frame layout -----------*- C++ -*--===//
+//===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,9 +8,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "SafeStackLayout.h"
-
-#include "llvm/IR/Instructions.h"
+#include "SafeStackColoring.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
 
 using namespace llvm;
 using namespace llvm::safestack;
diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h
index 313ed21c8869..7c1292f251f7 100644
--- a/lib/CodeGen/SafeStackLayout.h
+++ b/lib/CodeGen/SafeStackLayout.h
@@ -1,4 +1,4 @@
-//===-- SafeStackLayout.h - SafeStack frame layout -------------*- C++ -*--===//
+//===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,14 @@
 #define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
 
 #include "SafeStackColoring.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
+
+class raw_ostream;
+class Value;
+
 namespace safestack {
 
 /// Compute the layout of an unsafe stack frame.
@@ -23,10 +29,12 @@ class StackLayout {
     unsigned Start;
     unsigned End;
     StackColoring::LiveRange Range;
+
     StackRegion(unsigned Start, unsigned End,
                 const StackColoring::LiveRange &Range)
         : Start(Start), End(End), Range(Range) {}
   };
+
   /// The list of current stack regions, sorted by StackRegion::Start.
   SmallVector<StackRegion, 16> Regions;
 
@@ -35,6 +43,7 @@ class StackLayout {
     unsigned Size, Alignment;
     StackColoring::LiveRange Range;
   };
+
   SmallVector<StackObject, 8> StackObjects;
 
   DenseMap<const Value *, unsigned> ObjectOffsets;
@@ -43,6 +52,7 @@ class StackLayout {
 
 public:
   StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {}
+
   /// Add an object to the stack frame. Value pointer is opaque and used as a
   /// handle to retrieve the object's offset in the frame later.
   void addObject(const Value *V, unsigned Size, unsigned Alignment,
@@ -59,10 +69,12 @@ public:
 
   /// Returns the alignment of the frame.
   unsigned getFrameAlignment() { return MaxAlignment; }
+
   void print(raw_ostream &OS);
 };
 
-} // namespace safestack
-} // namespace llvm
+} // end namespace safestack
+
+} // end namespace llvm
 
 #endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 07b43a82ca99..cef413f9d410 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -1,5 +1,5 @@
-//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem      ===//
-//===                                instrinsics                           ===//
+//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
+//                                    instrinsics
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,9 +14,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
 
 using namespace llvm;
 
@@ -25,13 +42,15 @@ using namespace llvm;
 namespace {
 
 class ScalarizeMaskedMemIntrin : public FunctionPass {
-  const TargetTransformInfo *TTI;
+  const TargetTransformInfo *TTI = nullptr;
 
 public:
   static char ID; // Pass identification, replacement for typeid
-  explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) {
+
+  explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
     initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
   }
+
   bool runOnFunction(Function &F) override;
 
   StringRef getPassName() const override {
@@ -46,9 +65,11 @@ private:
   bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
   bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
 };
-} // namespace
+
+} // end anonymous namespace
 
 char ScalarizeMaskedMemIntrin::ID = 0;
+
 INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
                 "Scalarize unsupported masked memory intrinsics", false, false)
 
@@ -156,7 +177,6 @@ static void scalarizeMaskedLoad(CallInst *CI) {
   Value *PrevPhi = UndefVal;
 
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
     // Fill the "else" block, created in the previous iteration
     //
     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
@@ -288,7 +308,6 @@ static void scalarizeMaskedStore(CallInst *CI) {
   }
 
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
     // Fill the "else" block, created in the previous iteration
     //
     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
@@ -408,7 +427,6 @@ static void scalarizeMaskedGather(CallInst *CI) {
   Value *PrevPhi = UndefVal;
 
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
     // Fill the "else" block, created in the previous iteration
     //
     //  %Mask1 = extractelement <16 x i1> %Mask, i32 1
@@ -610,13 +628,12 @@ bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
 
 bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
                                                 bool &ModifiedDT) {
-
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
   if (II) {
     switch (II->getIntrinsicID()) {
     default:
       break;
-    case Intrinsic::masked_load: {
+    case Intrinsic::masked_load:
       // Scalarize unsupported vector masked load
       if (!TTI->isLegalMaskedLoad(CI->getType())) {
         scalarizeMaskedLoad(CI);
@@ -624,24 +641,21 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
         return true;
       }
       return false;
-    }
-    case Intrinsic::masked_store: {
+    case Intrinsic::masked_store:
       if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
         scalarizeMaskedStore(CI);
         ModifiedDT = true;
         return true;
       }
       return false;
-    }
-    case Intrinsic::masked_gather: {
+    case Intrinsic::masked_gather:
       if (!TTI->isLegalMaskedGather(CI->getType())) {
         scalarizeMaskedGather(CI);
         ModifiedDT = true;
         return true;
       }
       return false;
-    }
-    case Intrinsic::masked_scatter: {
+    case Intrinsic::masked_scatter:
       if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
         scalarizeMaskedScatter(CI);
         ModifiedDT = true;
@@ -649,7 +663,6 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
       }
       return false;
     }
-    }
   }
 
   return false;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 5e95f760aaa2..0635e8f41ee7 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -19,13 +19,13 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
@@ -80,7 +80,7 @@ raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
   case Data:
     OS << " Latency=" << getLatency();
     if (TRI && isAssignedRegDep())
-      OS << " Reg=" << PrintReg(getReg(), TRI);
+      OS << " Reg=" << printReg(getReg(), TRI);
     break;
   case Anti:
   case Output:
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 99baa07390eb..9249fa84b38b 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -21,7 +21,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -36,6 +36,8 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
@@ -52,8 +54,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
@@ -114,16 +114,18 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
     : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
       RemoveKillFlags(RemoveKillFlags),
       UnknownValue(UndefValue::get(
-                             Type::getVoidTy(mf.getFunction()->getContext()))) {
+                             Type::getVoidTy(mf.getFunction().getContext()))) {
   DbgValues.clear();
 
   const TargetSubtargetInfo &ST = mf.getSubtarget();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
 }
 
-/// If this machine instr has memory reference information and it can be tracked
-/// to a normal reference to a known object, return the Value for that object.
-static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+/// If this machine instr has memory reference information and it can be
+/// tracked to a normal reference to a known object, return the Value
+/// for that object. This function returns false the memory location is
+/// unknown or may alias anything.
+static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
                                          const MachineFrameInfo &MFI,
                                          UnderlyingObjectsVector &Objects,
                                          const DataLayout &DL) {
@@ -151,7 +153,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
         Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
       } else if (const Value *V = MMO->getValue()) {
         SmallVector<Value *, 4> Objs;
-        getUnderlyingObjectsForCodeGen(V, Objs, DL);
+        if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
+          return false;
 
         for (Value *V : Objs) {
           assert(isIdentifiedObject(V));
@@ -163,8 +166,12 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
     return true;
   };
 
-  if (!allMMOsOkay())
+  if (!allMMOsOkay()) {
     Objects.clear();
+    return false;
+  }
+
+  return true;
 }
 
 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
@@ -769,7 +776,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (PDiffs != nullptr)
         PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
 
-      RPTracker->recedeSkipDebugValues();
+      if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
+        RPTracker->recedeSkipDebugValues();
       assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
       RPTracker->recede(RegOpers);
     }
@@ -860,13 +868,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 
     // Find the underlying objects for MI. The Objs vector is either
     // empty, or filled with the Values of memory locations which this
-    // SU depends on. An empty vector means the memory location is
-    // unknown, and may alias anything.
+    // SU depends on.
     UnderlyingObjectsVector Objs;
-    getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout());
+    bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
+                                                  MF.getDataLayout());
 
     if (MI.mayStore()) {
-      if (Objs.empty()) {
+      if (!ObjsFound) {
         // An unknown store depends on all stores and loads.
         addChainDependencies(SU, Stores);
         addChainDependencies(SU, NonAliasStores);
@@ -901,7 +909,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         addChainDependencies(SU, Stores, UnknownValue);
       }
     } else { // SU is a load.
-      if (Objs.empty()) {
+      if (!ObjsFound) {
         // An unknown load depends on all stores.
         addChainDependencies(SU, Stores);
         addChainDependencies(SU, NonAliasStores);
@@ -1036,7 +1044,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
 }
 
 void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
-  DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n');
+  DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
 
   LiveRegs.init(*TRI);
   LiveRegs.addLiveOuts(MBB);
@@ -1348,7 +1356,7 @@ static bool hasDataSucc(const SUnit *SU) {
 /// search from this root.
 void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
   if (!IsBottomUp)
-    llvm_unreachable("Top-down ILP metric is unimplemnted");
+    llvm_unreachable("Top-down ILP metric is unimplemented");
 
   SchedDFSImpl Impl(*this);
   for (const SUnit &SU : SUnits) {
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index bb6a45996f63..37c4a470bd0a 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -14,14 +14,12 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include <fstream>
 using namespace llvm;
 
 namespace llvm {
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b3d83d5313af..b789e2d9c52c 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -15,12 +15,12 @@
 
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include <cassert>
 
 using namespace llvm;
@@ -32,6 +32,7 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
     const char *ParentDebugType)
     : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
       DAG(SchedDAG) {
+  (void)DebugType;
   // Determine the maximum depth of any itinerary. This determines the depth of
   // the scoreboard. We always make the scoreboard at least 1 cycle deep to
   // avoid dealing with the boundary condition.
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index ae9c5adb0397..fd1e5e2cfc56 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -24,7 +24,7 @@ add_llvm_library(LLVMSelectionDAG
   SelectionDAGTargetInfo.cpp
   StatepointLowering.cpp
   TargetLowering.cpp
-  
+
   DEPENDS
   intrinsics_gen
   )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 432c86dd6f1e..f97732c1c49d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,4 +1,4 @@
-//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,32 +16,64 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "dagcombine"
@@ -53,43 +85,41 @@ STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
 STATISTIC(SlicedLoads, "Number of load sliced");
 
-namespace {
-  static cl::opt<bool>
-    CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
-               cl::desc("Enable DAG combiner's use of IR alias analysis"));
+static cl::opt<bool>
+CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+                 cl::desc("Enable DAG combiner's use of IR alias analysis"));
 
-  static cl::opt<bool>
-    UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
-               cl::desc("Enable DAG combiner's use of TBAA"));
+static cl::opt<bool>
+UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
+        cl::desc("Enable DAG combiner's use of TBAA"));
 
 #ifndef NDEBUG
-  static cl::opt<std::string>
-    CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
-               cl::desc("Only use DAG-combiner alias analysis in this"
-                        " function"));
+static cl::opt<std::string>
+CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+                   cl::desc("Only use DAG-combiner alias analysis in this"
+                            " function"));
 #endif
 
-  /// Hidden option to stress test load slicing, i.e., when this option
-  /// is enabled, load slicing bypasses most of its profitability guards.
-  static cl::opt<bool>
-  StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
-                    cl::desc("Bypass the profitability model of load "
-                             "slicing"),
-                    cl::init(false));
+/// Hidden option to stress test load slicing, i.e., when this option
+/// is enabled, load slicing bypasses most of its profitability guards.
+static cl::opt<bool>
+StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+                  cl::desc("Bypass the profitability model of load slicing"),
+                  cl::init(false));
 
-  static cl::opt<bool>
-    MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
-                      cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<bool>
+  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+                    cl::desc("DAG combiner may split indexing from loads"));
 
-//------------------------------ DAGCombiner ---------------------------------//
+namespace {
 
   class DAGCombiner {
     SelectionDAG &DAG;
     const TargetLowering &TLI;
     CombineLevel Level;
     CodeGenOpt::Level OptLevel;
-    bool LegalOperations;
-    bool LegalTypes;
+    bool LegalOperations = false;
+    bool LegalTypes = false;
     bool ForCodeSize;
 
     /// \brief Worklist of all of the nodes that need to be simplified.
@@ -128,6 +158,19 @@ namespace {
     SDValue visit(SDNode *N);
 
   public:
+    DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
+        : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+          OptLevel(OL), AA(AA) {
+      ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
+
+      MaximumLegalStoreInBits = 0;
+      for (MVT VT : MVT::all_valuetypes())
+        if (EVT(VT).isSimple() && VT != MVT::Other &&
+            TLI.isTypeLegal(EVT(VT)) &&
+            VT.getSizeInBits() >= MaximumLegalStoreInBits)
+          MaximumLegalStoreInBits = VT.getSizeInBits();
+    }
+
     /// Add to the worklist making sure its instance is at the back (next to be
     /// processed.)
     void AddToWorklist(SDNode *N) {
@@ -285,7 +328,7 @@ namespace {
     SDValue visitSIGN_EXTEND(SDNode *N);
     SDValue visitZERO_EXTEND(SDNode *N);
     SDValue visitANY_EXTEND(SDNode *N);
-    SDValue visitAssertZext(SDNode *N);
+    SDValue visitAssertExt(SDNode *N);
     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -348,6 +391,7 @@ namespace {
     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 
     SDValue foldSelectOfConstants(SDNode *N);
+    SDValue foldVSelectOfConstants(SDNode *N);
     SDValue foldBinOpIntoSelect(SDNode *BO);
     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
@@ -371,6 +415,7 @@ namespace {
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
     SDValue CombineExtLoad(SDNode *N);
     SDValue combineRepeatedFPDivisors(SDNode *N);
+    SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildSDIVPow2(SDNode *N);
@@ -400,14 +445,11 @@ namespace {
     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
     SDValue reduceBuildVecToShuffle(SDNode *N);
-    SDValue reduceBuildVecToTrunc(SDNode *N);
     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
                                   ArrayRef<int> VectorMask, SDValue VecIn1,
                                   SDValue VecIn2, unsigned LeftIdx);
     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
 
-    SDValue GetDemandedBits(SDValue V, const APInt &Mask);
-
     /// Walk up chain skipping non-aliasing memory nodes,
     /// looking for aliasing nodes and adding them to the Aliases vector.
     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
@@ -434,12 +476,14 @@ namespace {
     /// Holds a pointer to an LSBaseSDNode as well as information on where it
     /// is located in a sequence of memory operations connected by a chain.
     struct MemOpLink {
-      MemOpLink(LSBaseSDNode *N, int64_t Offset)
-          : MemNode(N), OffsetFromBase(Offset) {}
       // Ptr to the mem node.
       LSBaseSDNode *MemNode;
+
       // Offset from the base ptr.
       int64_t OffsetFromBase;
+
+      MemOpLink(LSBaseSDNode *N, int64_t Offset)
+          : MemNode(N), OffsetFromBase(Offset) {}
     };
 
     /// This is a helper function for visitMUL to check the profitability
@@ -450,38 +494,49 @@ namespace {
                                      SDValue &AddNode,
                                      SDValue &ConstNode);
 
-
     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
-    /// the type of the loaded value to be extended.  LoadedVT returns the type
-    /// of the original loaded value.  NarrowLoad returns whether the load would
-    /// need to be narrowed in order to match.
+    /// the type of the loaded value to be extended.
     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
-                          EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
-                          bool &NarrowLoad);
+                          EVT LoadResultTy, EVT &ExtVT);
+
+    /// Helper function to calculate whether the given Load can have its
+    /// width reduced to ExtVT.
+    bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
+                           EVT &ExtVT, unsigned ShAmt = 0);
+
+    /// Used by BackwardsPropagateMask to find suitable loads.
+    bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
+                           SmallPtrSetImpl<SDNode*> &NodeWithConsts,
+                           ConstantSDNode *Mask, SDNode *&UncombinedNode);
+    /// Attempt to propagate a given AND node back to load leaves so that they
+    /// can be combined into narrow loads.
+    bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 
     /// Helper function for MergeConsecutiveStores which merges the
     /// component store chains.
     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
                                 unsigned NumStores);
 
-    /// This is a helper function for MergeConsecutiveStores. When the source
-    /// elements of the consecutive stores are all constants or all extracted
-    /// vector elements, try to merge them into one larger store.
-    /// \return True if a merged store was created.
+    /// This is a helper function for MergeConsecutiveStores. When the
+    /// source elements of the consecutive stores are all constants or
+    /// all extracted vector elements, try to merge them into one
+    /// larger store introducing bitcasts if necessary.  \return True
+    /// if a merged store was created.
     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
                                          EVT MemVT, unsigned NumStores,
                                          bool IsConstantSrc, bool UseVector,
                                          bool UseTrunc);
 
-    /// This is a helper function for MergeConsecutiveStores.
-    /// Stores that may be merged are placed in StoreNodes.
+    /// This is a helper function for MergeConsecutiveStores. Stores
+    /// that potentially may be merged with St are placed in
+    /// StoreNodes.
     void getStoreMergeCandidates(StoreSDNode *St,
                                  SmallVectorImpl<MemOpLink> &StoreNodes);
 
     /// Helper function for MergeConsecutiveStores. Checks if
-    /// Candidate stores have indirect dependency through their
-    /// operands. \return True if safe to merge
+    /// candidate stores have indirect dependency through their
+    /// operands. \return True if safe to merge.
     bool checkMergeStoreCandidatesForDependencies(
         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
 
@@ -500,19 +555,6 @@ namespace {
     SDValue distributeTruncateThroughAnd(SDNode *N);
 
   public:
-    DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
-        : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
-          OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
-      ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
-
-      MaximumLegalStoreInBits = 0;
-      for (MVT VT : MVT::all_valuetypes())
-        if (EVT(VT).isSimple() && VT != MVT::Other &&
-            TLI.isTypeLegal(EVT(VT)) &&
-            VT.getSizeInBits() >= MaximumLegalStoreInBits)
-          MaximumLegalStoreInBits = VT.getSizeInBits();
-    }
-
     /// Runs the dag combiner on all nodes in the work list
     void Run(CombineLevel AtLevel);
 
@@ -541,14 +583,12 @@ namespace {
       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
     }
   };
-}
-
 
-namespace {
 /// This class is a DAGUpdateListener that removes any deleted
 /// nodes from the worklist.
 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
   DAGCombiner &DC;
+
 public:
   explicit WorklistRemover(DAGCombiner &dc)
     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
@@ -557,7 +597,8 @@ public:
     DC.removeFromWorklist(N);
   }
 };
-}
+
+} // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 //  TargetLowering::DAGCombinerInfo implementation
@@ -577,7 +618,6 @@ CombineTo(SDNode *N, SDValue Res, bool AddTo) {
   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 }
 
-
 SDValue TargetLowering::DAGCombinerInfo::
 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
@@ -873,6 +913,56 @@ static bool isAnyConstantBuildVector(const SDNode *N) {
          ISD::isBuildVectorOfConstantFPSDNodes(N);
 }
 
+// Attempt to match a unary predicate against a scalar/splat constant or
+// every element of a constant BUILD_VECTOR.
+static bool matchUnaryPredicate(SDValue Op,
+                                std::function<bool(ConstantSDNode *)> Match) {
+  if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+    return Match(Cst);
+
+  if (ISD::BUILD_VECTOR != Op.getOpcode())
+    return false;
+
+  EVT SVT = Op.getValueType().getScalarType();
+  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+    auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+    if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+      return false;
+  }
+  return true;
+}
+
+// Attempt to match a binary predicate against a pair of scalar/splat constants
+// or every element of a pair of constant BUILD_VECTORs.
+static bool matchBinaryPredicate(
+    SDValue LHS, SDValue RHS,
+    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+  if (LHS.getValueType() != RHS.getValueType())
+    return false;
+
+  if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+    if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+      return Match(LHSCst, RHSCst);
+
+  if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+      ISD::BUILD_VECTOR != RHS.getOpcode())
+    return false;
+
+  EVT SVT = LHS.getValueType().getScalarType();
+  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+    auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
+    auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+    if (!LHSCst || !RHSCst)
+      return false;
+    if (LHSCst->getValueType(0) != SVT ||
+        LHSCst->getValueType(0) != RHSCst->getValueType(0))
+      return false;
+    if (!Match(LHSCst, RHSCst))
+      return false;
+  }
+  return true;
+}
+
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
                                     SDValue N1) {
   EVT VT = N0.getValueType();
@@ -1123,10 +1213,10 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
     Replace0 &= !N0->hasOneUse();
     Replace1 &= (N0 != N1) && !N1->hasOneUse();
 
-    // Combine Op here so it is presreved past replacements.
+    // Combine Op here so it is preserved past replacements.
     CombineTo(Op.getNode(), RV);
 
-    // If operands have a use ordering, make sur we deal with
+    // If operands have a use ordering, make sure we deal with
     // predecessor first.
     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
       std::swap(N0, N1);
@@ -1473,7 +1563,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
-  case ISD::AssertZext:         return visitAssertZext(N);
+  case ISD::AssertSext:
+  case ISD::AssertZext:         return visitAssertExt(N);
   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1572,15 +1663,15 @@ SDValue DAGCombiner::combine(SDNode *N) {
     }
   }
 
-  // If N is a commutative binary node, try commuting it to enable more
-  // sdisel CSE.
+  // If N is a commutative binary node, try eliminate it if the commuted
+  // version is already present in the DAG.
   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
       N->getNumValues() == 1) {
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
 
     // Constant operands are canonicalized to RHS.
-    if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+    if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
       SDValue Ops[] = {N1, N0};
       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
                                             N->getFlags());
@@ -1632,7 +1723,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
 
     // Check each of the operands.
     for (const SDValue &Op : TF->op_values()) {
-
       switch (Op.getOpcode()) {
       case ISD::EntryToken:
         // Entry tokens don't need to be added to the list. They are
@@ -1907,6 +1997,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
       }
     }
+
+    // Undo the add -> or combine to merge constant offsets from a frame index.
+    if (N0.getOpcode() == ISD::OR &&
+        isa<FrameIndexSDNode>(N0.getOperand(0)) &&
+        isa<ConstantSDNode>(N0.getOperand(1)) &&
+        DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
+      SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
+      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
+    }
   }
 
   if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -2064,7 +2163,8 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
   }
 
   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
-  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
+  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
+      N1.getResNo() == 0)
     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
                        N0, N1.getOperand(0), N1.getOperand(2));
 
@@ -2537,6 +2637,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
 
     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
+    assert((!N0IsConst ||
+            ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
+           "Splat APInt should be element width");
+    assert((!N1IsConst ||
+            ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
+           "Splat APInt should be element width");
   } else {
     N0IsConst = isa<ConstantSDNode>(N0);
     if (N0IsConst) {
@@ -2562,12 +2668,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   // fold (mul x, 0) -> 0
   if (N1IsConst && ConstValue1.isNullValue())
     return N1;
-  // We require a splat of the entire scalar bit width for non-contiguous
-  // bit patterns.
-  bool IsFullSplat =
-    ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
   // fold (mul x, 1) -> x
-  if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
+  if (N1IsConst && ConstValue1.isOneValue())
     return N0;
 
   if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -2580,16 +2682,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                        DAG.getConstant(0, DL, VT), N0);
   }
   // fold (mul x, (1 << c)) -> x << c
-  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
-      IsFullSplat) {
+  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+      DAG.isKnownToBeAPowerOfTwo(N1) &&
+      (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
     SDLoc DL(N);
-    return DAG.getNode(ISD::SHL, DL, VT, N0,
-                       DAG.getConstant(ConstValue1.logBase2(), DL,
-                                       getShiftAmountTy(N0.getValueType())));
+    SDValue LogBase2 = BuildLogBase2(N1, DL);
+    AddToWorklist(LogBase2.getNode());
+
+    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+    AddToWorklist(Trunc.getNode());
+    return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
   }
   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
-  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
-      IsFullSplat) {
+  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
     unsigned Log2Val = (-ConstValue1).logBase2();
     SDLoc DL(N);
     // FIXME: If the input is something that is easily negated (e.g. a
@@ -2835,7 +2941,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   // If integer divide is expensive and we satisfy the requirements, emit an
   // alternate sequence.  Targets may check function attributes for size/speed
   // trade-offs.
-  AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildSDIV(N))
       return Op;
@@ -2906,7 +3012,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   }
 
   // fold (udiv x, c) -> alternate
-  AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue Op = BuildUDIV(N))
       return Op;
@@ -2965,7 +3071,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
     }
   }
 
-  AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
 
   // If X/C can be simplified by the division-by-constant logic, lower
   // X%C to the equivalent of X-X/C*C.
@@ -3003,19 +3109,26 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
+  if (VT.isVector()) {
+    // fold (mulhs x, 0) -> 0
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N0;
+  }
+
   // fold (mulhs x, 0) -> 0
   if (isNullConstant(N1))
     return N1;
   // fold (mulhs x, 1) -> (sra x, size(x)-1)
-  if (isOneConstant(N1)) {
-    SDLoc DL(N);
+  if (isOneConstant(N1))
     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
                                        getShiftAmountTy(N0.getValueType())));
-  }
+
   // fold (mulhs x, undef) -> 0
   if (N0.isUndef() || N1.isUndef())
-    return DAG.getConstant(0, SDLoc(N), VT);
+    return DAG.getConstant(0, DL, VT);
 
   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   // plus a shift.
@@ -3043,6 +3156,14 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
+  if (VT.isVector()) {
+    // fold (mulhu x, 0) -> 0
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N0;
+  }
+
   // fold (mulhu x, 0) -> 0
   if (isNullConstant(N1))
     return N1;
@@ -3216,7 +3337,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
 
-  // fold (add c1, c2) -> c1+c2
+  // fold operation with constant operands.
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   if (N0C && N1C)
@@ -3599,22 +3720,20 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
 }
 
 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
-                                   EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
-                                   bool &NarrowLoad) {
-  uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
-
-  if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
+                                   EVT LoadResultTy, EVT &ExtVT) {
+  if (!AndC->getAPIntValue().isMask())
     return false;
 
+  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+
   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-  LoadedVT = LoadN->getMemoryVT();
+  EVT LoadedVT = LoadN->getMemoryVT();
 
   if (ExtVT == LoadedVT &&
       (!LegalOperations ||
        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
     // ZEXTLOAD will match without needing to change the size of the value being
     // loaded.
-    NarrowLoad = false;
     return true;
   }
 
@@ -3634,10 +3753,185 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
     return false;
 
-  NarrowLoad = true;
   return true;
 }
 
+bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
+                                    EVT &ExtVT, unsigned ShAmt) {
+  // Don't transform one with multiple uses, this would require adding a new
+  // load.
+  if (!SDValue(LoadN, 0).hasOneUse())
+    return false;
+
+  if (LegalOperations &&
+      !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
+    return false;
+
+  // Do not generate loads of non-round integer types since these can
+  // be expensive (and would be wrong if the type is not byte sized).
+  if (!ExtVT.isRound())
+    return false;
+
+  // Don't change the width of a volatile load.
+  if (LoadN->isVolatile())
+    return false;
+
+  // Verify that we are actually reducing a load width here.
+  if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
+    return false;
+
+  // For the transform to be legal, the load must produce only two values
+  // (the value loaded and the chain).  Don't transform a pre-increment
+  // load, for example, which produces an extra value.  Otherwise the
+  // transformation is not equivalent, and the downstream logic to replace
+  // uses gets things wrong.
+  if (LoadN->getNumValues() > 2)
+    return false;
+
+  // If the load that we're shrinking is an extload and we're not just
+  // discarding the extension we can't simply shrink the load. Bail.
+  // TODO: It would be possible to merge the extensions in some cases.
+  if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
+      LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
+    return false;
+
+  if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
+    return false;
+
+  // It's not possible to generate a constant of extended or untyped type.
+  EVT PtrType = LoadN->getOperand(1).getValueType();
+  if (PtrType == MVT::Untyped || PtrType.isExtended())
+    return false;
+
+  return true;
+}
+
+bool DAGCombiner::SearchForAndLoads(SDNode *N,
+                                    SmallPtrSetImpl<LoadSDNode*> &Loads,
+                                    SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+                                    ConstantSDNode *Mask,
+                                    SDNode *&NodeToMask) {
+  // Recursively search for the operands, looking for loads which can be
+  // narrowed.
+  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
+    SDValue Op = N->getOperand(i);
+
+    if (Op.getValueType().isVector())
+      return false;
+
+    // Some constants may need fixing up later if they are too large.
+    if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+      if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
+          (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
+        NodesWithConsts.insert(N);
+      continue;
+    }
+
+    if (!Op.hasOneUse())
+      return false;
+
+    switch(Op.getOpcode()) {
+    case ISD::LOAD: {
+      auto *Load = cast<LoadSDNode>(Op);
+      EVT ExtVT;
+      if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
+          isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
+        // Only add this load if we can make it more narrow.
+        if (ExtVT.bitsLT(Load->getMemoryVT()))
+          Loads.insert(Load);
+        continue;
+      }
+      return false;
+    }
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+    case ISD::AssertZext: {
+      unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
+      EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+      EVT VT = Op.getOpcode() == ISD::AssertZext ?
+        cast<VTSDNode>(Op.getOperand(1))->getVT() :
+        Op.getOperand(0).getValueType();
+
+      // We can accept extending nodes if the mask is wider or an equal
+      // width to the original type.
+      if (ExtVT.bitsGE(VT))
+        continue;
+      break;
+    }
+    case ISD::OR:
+    case ISD::XOR:
+    case ISD::AND:
+      if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
+                             NodeToMask))
+        return false;
+      continue;
+    }
+
+    // Allow one node which will masked along with any loads found.
+    if (NodeToMask)
+      return false;
+    NodeToMask = Op.getNode();
+  }
+  return true;
+}
+
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!Mask)
+    return false;
+
+  if (!Mask->getAPIntValue().isMask())
+    return false;
+
+  // No need to do anything if the and directly uses a load.
+  if (isa<LoadSDNode>(N->getOperand(0)))
+    return false;
+
+  SmallPtrSet<LoadSDNode*, 8> Loads;
+  SmallPtrSet<SDNode*, 2> NodesWithConsts;
+  SDNode *FixupNode = nullptr;
+  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
+    if (Loads.size() == 0)
+      return false;
+
+    SDValue MaskOp = N->getOperand(1);
+
+    // If it exists, fixup the single node we allow in the tree that needs
+    // masking.
+    if (FixupNode) {
+      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
+                                FixupNode->getValueType(0),
+                                SDValue(FixupNode, 0), MaskOp);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
+      DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
+                             MaskOp);
+    }
+
+    // Narrow any constants that need it.
+    for (auto *LogicN : NodesWithConsts) {
+      auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
+      SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
+                                SDValue(C, 0), MaskOp);
+      DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
+    }
+
+    // Create narrow loads.
+    for (auto *Load : Loads) {
+      SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
+                                SDValue(Load, 0), MaskOp);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
+      DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
+      SDValue NewLoad = ReduceLoadWidth(And.getNode());
+      assert(NewLoad &&
+             "Shouldn't be masking the load if it can't be narrowed");
+      CombineTo(Load, NewLoad, NewLoad.getValue(1));
+    }
+    DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
+    return true;
+  }
+  return false;
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -3829,55 +4123,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
-    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
-    LoadSDNode *LN0 = HasAnyExt
-      ? cast<LoadSDNode>(N0.getOperand(0))
-      : cast<LoadSDNode>(N0);
-    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
-        LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
-      auto NarrowLoad = false;
-      EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-      EVT ExtVT, LoadedVT;
-      if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
-                           NarrowLoad)) {
-        if (!NarrowLoad) {
-          SDValue NewLoad =
-            DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
-                           LN0->getChain(), LN0->getBasePtr(), ExtVT,
-                           LN0->getMemOperand());
-          AddToWorklist(N);
-          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
-          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-        } else {
-          EVT PtrType = LN0->getOperand(1).getValueType();
-
-          unsigned Alignment = LN0->getAlignment();
-          SDValue NewPtr = LN0->getBasePtr();
-
-          // For big endian targets, we need to add an offset to the pointer
-          // to load the correct bytes.  For little endian systems, we merely
-          // need to read fewer bytes from the same pointer.
-          if (DAG.getDataLayout().isBigEndian()) {
-            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
-            unsigned EVTStoreBytes = ExtVT.getStoreSize();
-            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-            SDLoc DL(LN0);
-            NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
-                                 NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
-            Alignment = MinAlign(Alignment, PtrOff);
-          }
+    if (SDValue Res = ReduceLoadWidth(N)) {
+      LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
+        ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
 
-          AddToWorklist(NewPtr.getNode());
+      AddToWorklist(N);
+      CombineTo(LN0, Res, Res.getValue(1));
+      return SDValue(N, 0);
+    }
+  }
 
-          SDValue Load = DAG.getExtLoad(
-              ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
-              LN0->getPointerInfo(), ExtVT, Alignment,
-              LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
-          AddToWorklist(N);
-          CombineTo(LN0, Load, Load.getValue(1));
-          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-        }
-      }
+  if (Level >= AfterLegalizeTypes) {
+    // Attempt to propagate the AND back up to the leaves which, if they're
+    // loads, can be combined to narrow loads and the AND node can be removed.
+    // Perform after legalization so that extend nodes will already be
+    // combined into the loads.
+    if (BackwardsPropagateMask(N, DAG)) {
+      return SDValue(N, 0);
     }
   }
 
@@ -3974,7 +4236,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
     return SDValue();
 
-  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
   bool LookPassAnd0 = false;
   bool LookPassAnd1 = false;
   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
@@ -4593,20 +4855,6 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   return nullptr;
 }
 
-// if Left + Right == Sum (constant or constant splat vector)
-static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum,
-                             SelectionDAG &DAG, const SDLoc &DL) {
-  EVT ShiftVT = Left.getValueType();
-  if (ShiftVT != Right.getValueType()) return false;
-
-  SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT,
-                         Left.getNode(), Right.getNode());
-  if (!ShiftSum) return false;
-
-  ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum);
-  return CSum && CSum->getZExtValue() == Sum;
-}
-
 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
 // idioms for rotate, and if the target supports rotation instructions, generate
 // a rot[lr].
@@ -4620,6 +4868,16 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
   if (!HasROTL && !HasROTR) return nullptr;
 
+  // Check for truncated rotate.
+  if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
+      LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
+    assert(LHS.getValueType() == RHS.getValueType());
+    if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+      return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
+                         SDValue(Rot, 0)).getNode();
+    }
+  }
+
   // Match "(X shl/srl V1) & V2" where V2 may not be present.
   SDValue LHSShift;   // The shift.
   SDValue LHSMask;    // AND value if any.
@@ -4652,7 +4910,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
 
   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
-  if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) {
+  auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
+                                        ConstantSDNode *RHS) {
+    return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
+  };
+  if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
 
@@ -4712,20 +4974,22 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
 }
 
 namespace {
+
 /// Represents known origin of an individual byte in load combine pattern. The
 /// value of the byte is either constant zero or comes from memory.
 struct ByteProvider {
   // For constant zero providers Load is set to nullptr. For memory providers
   // Load represents the node which loads the byte from memory.
   // ByteOffset is the offset of the byte in the value produced by the load.
-  LoadSDNode *Load;
-  unsigned ByteOffset;
+  LoadSDNode *Load = nullptr;
+  unsigned ByteOffset = 0;
 
-  ByteProvider() : Load(nullptr), ByteOffset(0) {}
+  ByteProvider() = default;
 
   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
     return ByteProvider(Load, ByteOffset);
   }
+
   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
 
   bool isConstantZero() const { return !Load; }
@@ -4740,6 +5004,8 @@ private:
       : Load(Load), ByteOffset(ByteOffset) {}
 };
 
+} // end anonymous namespace
+
 /// Recursively traverses the expression calculating the origin of the requested
 /// byte of the given value. Returns None if the provider can't be calculated.
 ///
@@ -4751,9 +5017,9 @@ private:
 /// Because the parts of the expression are not allowed to have more than one
 /// use this function iterates over trees, not DAGs. So it never visits the same
 /// node more than once.
-const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
-                                                   unsigned Depth,
-                                                   bool Root = false) {
+static const Optional<ByteProvider>
+calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
+                      bool Root = false) {
   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
   if (Depth == 10)
     return None;
@@ -4837,7 +5103,6 @@ const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
 
   return None;
 }
-} // namespace
 
 /// Match a pattern where a wide type scalar value is loaded by several narrow
 /// loads and combined by shifts and ors. Fold it into a single load or a load
@@ -4950,7 +5215,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
 
     Loads.insert(L);
   }
-  assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
+  assert(!Loads.empty() && "All the bytes of the value must be loaded from "
          "memory, so there must be at least one load which produces the value");
   assert(Base && "Base address of the accessed memory location must be set");
   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
@@ -5373,7 +5638,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   if (isNullConstantOrNullSplatConstant(N0))
     return N0;
   // fold (shl x, c >= size(x)) -> undef
-  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+  auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+    return Val->getAPIntValue().uge(OpSizeInBits);
+  };
+  if (matchUnaryPredicate(N1, MatchShiftTooBig))
     return DAG.getUNDEF(VT);
   // fold (shl x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -5400,20 +5669,29 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return SDValue(N, 0);
 
   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
-  if (N1C && N0.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      SDLoc DL(N);
-      APInt c1 = N0C1->getAPIntValue();
-      APInt c2 = N1C->getAPIntValue();
+  if (N0.getOpcode() == ISD::SHL) {
+    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+                                          ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return (c1 + c2).uge(OpSizeInBits);
+    };
+    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+      return DAG.getConstant(0, SDLoc(N), VT);
 
-      APInt Sum = c1 + c2;
-      if (Sum.uge(OpSizeInBits))
-        return DAG.getConstant(0, DL, VT);
-
-      return DAG.getNode(
-          ISD::SHL, DL, VT, N0.getOperand(0),
-          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+                                       ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return (c1 + c2).ult(OpSizeInBits);
+    };
+    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+      SDLoc DL(N);
+      EVT ShiftVT = N1.getValueType();
+      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
     }
   }
 
@@ -5527,16 +5805,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   }
 
   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+  // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
   // Variant of version done on multiply, except mul by a power of 2 is turned
   // into a shift.
-  if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+  if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
+      N0.getNode()->hasOneUse() &&
       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
     AddToWorklist(Shl0.getNode());
     AddToWorklist(Shl1.getNode());
-    return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+    return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
   }
 
   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
@@ -5579,7 +5859,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
   // fold (sra x, c >= size(x)) -> undef
-  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+  auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+    return Val->getAPIntValue().uge(OpSizeInBits);
+  };
+  if (matchUnaryPredicate(N1, MatchShiftTooBig))
     return DAG.getUNDEF(VT);
   // fold (sra x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -5603,20 +5887,31 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   }
 
   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
-  if (N1C && N0.getOpcode() == ISD::SRA) {
-    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      SDLoc DL(N);
-      APInt c1 = N0C1->getAPIntValue();
-      APInt c2 = N1C->getAPIntValue();
-      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
-
-      APInt Sum = c1 + c2;
-      if (Sum.uge(OpSizeInBits))
-        Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
+  if (N0.getOpcode() == ISD::SRA) {
+    SDLoc DL(N);
+    EVT ShiftVT = N1.getValueType();
 
-      return DAG.getNode(
-          ISD::SRA, DL, VT, N0.getOperand(0),
-          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+                                          ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return (c1 + c2).uge(OpSizeInBits);
+    };
+    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
+
+    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+                                       ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return (c1 + c2).ult(OpSizeInBits);
+    };
+    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
     }
   }
 
@@ -5647,7 +5942,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
           TLI.isTruncateFree(VT, TruncVT)) {
-
         SDLoc DL(N);
         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
             getShiftAmountTy(N0.getOperand(0).getValueType()));
@@ -5697,7 +5991,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-
   // If the sign bit is known to be zero, switch this to a SRL.
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
@@ -5730,7 +6023,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (isNullConstantOrNullSplatConstant(N0))
     return N0;
   // fold (srl x, c >= size(x)) -> undef
-  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+  // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+  auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+    return Val->getAPIntValue().uge(OpSizeInBits);
+  };
+  if (matchUnaryPredicate(N1, MatchShiftTooBig))
     return DAG.getUNDEF(VT);
   // fold (srl x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -5745,20 +6042,29 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     return DAG.getConstant(0, SDLoc(N), VT);
 
   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
-  if (N1C && N0.getOpcode() == ISD::SRL) {
-    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      SDLoc DL(N);
-      APInt c1 = N0C1->getAPIntValue();
-      APInt c2 = N1C->getAPIntValue();
+  if (N0.getOpcode() == ISD::SRL) {
+    auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+                                          ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return (c1 + c2).uge(OpSizeInBits);
+    };
+    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+      return DAG.getConstant(0, SDLoc(N), VT);
 
-      APInt Sum = c1 + c2;
-      if (Sum.uge(OpSizeInBits))
-        return DAG.getConstant(0, DL, VT);
-
-      return DAG.getNode(
-          ISD::SRL, DL, VT, N0.getOperand(0),
-          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+    auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+                                       ConstantSDNode *RHS) {
+      APInt c1 = LHS->getAPIntValue();
+      APInt c2 = RHS->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+      return (c1 + c2).ult(OpSizeInBits);
+    };
+    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+      SDLoc DL(N);
+      EVT ShiftVT = N1.getValueType();
+      SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
     }
   }
 
@@ -6008,7 +6314,6 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   return SDValue();
 }
 
-
 /// \brief Generate Min/Max node
 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
                                    SDValue RHS, SDValue True, SDValue False,
@@ -6096,7 +6401,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
     // For any constants that differ by 1, we can transform the select into an
     // extend and add. Use a target hook because some targets may prefer to
     // transform in the other direction.
-    if (TLI.convertSelectOfConstantsToMath()) {
+    if (TLI.convertSelectOfConstantsToMath(VT)) {
       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
         if (VT != MVT::i1)
@@ -6371,7 +6676,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
 }
 
 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
-
   if (Level >= AfterLegalizeTypes)
     return SDValue();
 
@@ -6432,7 +6736,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
-
   if (Level >= AfterLegalizeTypes)
     return SDValue();
 
@@ -6447,7 +6750,6 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   // and enables future optimizations (e.g. min/max pattern matching on X86).
   if (Mask.getOpcode() == ISD::SETCC) {
-
     // Check if any splitting is required.
     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
         TargetLowering::TypeSplitVector)
@@ -6504,11 +6806,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
-
   if (Level >= AfterLegalizeTypes)
     return SDValue();
 
-  MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
   SDValue Mask = MGT->getMask();
   SDLoc DL(N);
 
@@ -6581,7 +6882,6 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
-
   if (Level >= AfterLegalizeTypes)
     return SDValue();
 
@@ -6593,7 +6893,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   // and enables future optimizations (e.g. min/max pattern matching on X86).
-
   if (Mask.getOpcode() == ISD::SETCC) {
     EVT VT = N->getValueType(0);
 
@@ -6665,6 +6964,57 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
   return SDValue();
 }
 
+/// A vector select of 2 constant vectors can be simplified to math/logic to
+/// avoid a variable select instruction and possibly avoid constant loads.
+SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
+  SDValue Cond = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  EVT VT = N->getValueType(0);
+  if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
+      !TLI.convertSelectOfConstantsToMath(VT) ||
+      !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
+      !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
+    return SDValue();
+
+  // Check if we can use the condition value to increment/decrement a single
+  // constant value. This simplifies a select to an add and removes a constant
+  // load/materialization from the general case.
+  bool AllAddOne = true;
+  bool AllSubOne = true;
+  unsigned Elts = VT.getVectorNumElements();
+  for (unsigned i = 0; i != Elts; ++i) {
+    SDValue N1Elt = N1.getOperand(i);
+    SDValue N2Elt = N2.getOperand(i);
+    if (N1Elt.isUndef() || N2Elt.isUndef())
+      continue;
+
+    const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
+    const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
+    if (C1 != C2 + 1)
+      AllAddOne = false;
+    if (C1 != C2 - 1)
+      AllSubOne = false;
+  }
+
+  // Further simplifications for the extra-special cases where the constants are
+  // all 0 or all -1 should be implemented as folds of these patterns.
+  SDLoc DL(N);
+  if (AllAddOne || AllSubOne) {
+    // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
+    // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
+    auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+    SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
+    return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
+  }
+
+  // The general case for select-of-constants:
+  // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
+  // ...but that only makes sense if a vselect is slower than 2 logic ops, so
+  // leave that to a machine-specific pass.
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -6729,6 +7079,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
       return CV;
   }
 
+  if (SDValue V = foldVSelectOfConstants(N))
+    return V;
+
   return SDValue();
 }
 
@@ -7243,8 +7596,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                     SDLoc(N0.getOperand(0)),
                                     N0.getOperand(0).getValueType(), ExtLoad);
         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
+        bool NoReplaceTruncAnd = !N0.hasOneUse();
         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
         CombineTo(N, And);
+        // If N0 has multiple uses, change other uses as well.
+        if (NoReplaceTruncAnd) {
+          SDValue TruncAnd =
+              DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+          CombineTo(N0.getNode(), TruncAnd);
+        }
         if (NoReplaceTrunc)
           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
         else
@@ -7307,7 +7667,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
       return SCC;
 
-    if (!VT.isVector()) {
+    if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
       EVT SetCCVT = getSetCCResultType(N00VT);
       // Don't do this transform for i1 because there's a select transform
       // that would reverse it.
@@ -7399,20 +7759,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
   }
 
-  // fold (zext (truncate (load x))) -> (zext (smaller load x))
-  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
-  if (N0.getOpcode() == ISD::TRUNCATE) {
-    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode *oye = N0.getOperand(0).getNode();
-      if (NarrowLoad.getNode() != N0.getNode()) {
-        CombineTo(N0.getNode(), NarrowLoad);
-        // CombineTo deleted the truncate, if needed, but not what's under it.
-        AddToWorklist(oye);
-      }
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-    }
-  }
-
   // fold (zext (truncate x)) -> (and x, mask)
   if (N0.getOpcode() == ISD::TRUNCATE) {
     // fold (zext (truncate (load x))) -> (zext (smaller load x))
@@ -7445,7 +7791,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
       AddToWorklist(Op.getNode());
-      return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+      SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+      // We may safely transfer the debug info describing the truncate node over
+      // to the equivalent and operation.
+      DAG.transferDbgValues(N0, And);
+      return And;
     }
   }
 
@@ -7522,11 +7872,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       if (!N0.hasOneUse()) {
         if (N0.getOpcode() == ISD::AND) {
           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
-          auto NarrowLoad = false;
           EVT LoadResultTy = AndC->getValueType(0);
-          EVT ExtVT, LoadedVT;
-          if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
-                               NarrowLoad))
+          EVT ExtVT;
+          if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT))
             DoXform = false;
         }
         if (DoXform)
@@ -7547,8 +7895,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                     SDLoc(N0.getOperand(0)),
                                     N0.getOperand(0).getValueType(), ExtLoad);
         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
+        bool NoReplaceTruncAnd = !N0.hasOneUse();
         bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
         CombineTo(N, And);
+        // If N0 has multiple uses, change other uses as well.
+        if (NoReplaceTruncAnd) {
+          SDValue TruncAnd =
+              DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+          CombineTo(N0.getNode(), TruncAnd);
+        }
         if (NoReplaceTrunc)
           DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
         else
@@ -7604,10 +7959,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       // If the desired elements are smaller or larger than the source
       // elements we can use a matching integer vector type and then
       // truncate/sign extend.
-      EVT MatchingElementType = EVT::getIntegerVT(
-          *DAG.getContext(), N00VT.getScalarSizeInBits());
-      EVT MatchingVectorType = EVT::getVectorVT(
-          *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
+      EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
       SDValue VsetCC =
           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
                       N0.getOperand(1), N0.getOperand(2));
@@ -7731,7 +8083,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                       ISD::ANY_EXTEND);
       // If the load value is used only by N, replace it via CombineTo N.
       bool NoReplaceTrunc = N0.hasOneUse();
-      CombineTo(N, ExtLoad); 
+      CombineTo(N, ExtLoad);
       if (NoReplaceTrunc)
         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
       else
@@ -7769,13 +8121,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
     // aext(setcc) -> aext(vsetcc)
     // Only do this before legalize for now.
     if (VT.isVector() && !LegalOperations) {
-      EVT N0VT = N0.getOperand(0).getValueType();
-        // We know that the # elements of the results is the same as the
-        // # elements of the compare (and the # elements of the compare result
-        // for that matter).  Check to see that they are the same size.  If so,
-        // we know that the element size of the sext'd result matches the
-        // element size of the compare operands.
-      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+      EVT N00VT = N0.getOperand(0).getValueType();
+      if (getSetCCResultType(N00VT) == N0.getValueType())
+        return SDValue();
+
+      // We know that the # elements of the results is the same as the
+      // # elements of the compare (and the # elements of the compare result
+      // for that matter).  Check to see that they are the same size.  If so,
+      // we know that the element size of the sext'd result matches the
+      // element size of the compare operands.
+      if (VT.getSizeInBits() == N00VT.getSizeInBits())
         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
                              N0.getOperand(1),
                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -7783,7 +8138,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       // elements we can use a matching integer vector type and then
       // truncate/any extend
       else {
-        EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
+        EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
         SDValue VsetCC =
           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
                         N0.getOperand(1),
@@ -7804,77 +8159,47 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitAssertZext(SDNode *N) {
+SDValue DAGCombiner::visitAssertExt(SDNode *N) {
+  unsigned Opcode = N->getOpcode();
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  EVT EVT = cast<VTSDNode>(N1)->getVT();
+  EVT AssertVT = cast<VTSDNode>(N1)->getVT();
 
-  // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
-  if (N0.getOpcode() == ISD::AssertZext &&
-      EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+  // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
+  if (N0.getOpcode() == Opcode &&
+      AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
     return N0;
 
-  return SDValue();
-}
+  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+      N0.getOperand(0).getOpcode() == Opcode) {
+    // We have an assert, truncate, assert sandwich. Make one stronger assert
+    // by asserting on the smallest asserted type to the larger source type.
+    // This eliminates the later assert:
+    // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
+    // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
+    SDValue BigA = N0.getOperand(0);
+    EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+    assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+           "Asserting zero/sign-extended bits to a type larger than the "
+           "truncated destination does not provide information");
 
-/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by Mask are used.  If so, return the simpler operand,
-/// otherwise return a null SDValue.
-///
-/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
-/// simplify nodes with multiple uses more aggressively.)
-SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
-  switch (V.getOpcode()) {
-  default: break;
-  case ISD::Constant: {
-    const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
-    assert(CV && "Const value should be ConstSDNode.");
-    const APInt &CVal = CV->getAPIntValue();
-    APInt NewVal = CVal & Mask;
-    if (NewVal != CVal)
-      return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
-    break;
+    SDLoc DL(N);
+    EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
+    SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
+    SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+                                    BigA.getOperand(0), MinAssertVTVal);
+    return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
   }
-  case ISD::OR:
-  case ISD::XOR:
-    // If the LHS or RHS don't contribute bits to the or, drop them.
-    if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
-      return V.getOperand(1);
-    if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
-      return V.getOperand(0);
-    break;
-  case ISD::SRL:
-    // Only look at single-use SRLs.
-    if (!V.getNode()->hasOneUse())
-      break;
-    if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
-      // See if we can recursively simplify the LHS.
-      unsigned Amt = RHSC->getZExtValue();
 
-      // Watch out for shift count overflow though.
-      if (Amt >= Mask.getBitWidth()) break;
-      APInt NewMask = Mask << Amt;
-      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
-        return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
-                           SimplifyLHS, V.getOperand(1));
-    }
-    break;
-  case ISD::AND: {
-    // X & -1 -> X (ignoring bits which aren't demanded).
-    ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
-    if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
-      return V.getOperand(0);
-    break;
-  }
-  }
   return SDValue();
 }
 
 /// If the result of a wider load is shifted to right of N  bits and then
 /// truncated to a narrower type and where N is a multiple of number of bits of
 /// the narrower type, transform it to a narrower load from address + N / num of
-/// bits of new type. If the result is to be extended, also fold the extension
-/// to form a extending load.
+/// bits of new type. Also narrow the load if the result is masked with an AND
+/// to effectively produce a smaller type. If the result is to be extended, also
+/// fold the extension to form a extending load.
 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   unsigned Opc = N->getOpcode();
 
@@ -7893,28 +8218,40 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     ExtType = ISD::SEXTLOAD;
     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   } else if (Opc == ISD::SRL) {
-    // Another special-case: SRL is basically zero-extending a narrower value.
+    // Another special-case: SRL is basically zero-extending a narrower value,
+    // or it maybe shifting a higher subword, half or byte into the lowest
+    // bits.
     ExtType = ISD::ZEXTLOAD;
     N0 = SDValue(N, 0);
-    ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (!N01) return SDValue();
-    ExtVT = EVT::getIntegerVT(*DAG.getContext(),
-                              VT.getSizeInBits() - N01->getZExtValue());
-  }
-  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
-    return SDValue();
 
-  unsigned EVTBits = ExtVT.getSizeInBits();
+    auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
+    auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!N01 || !LN0)
+      return SDValue();
 
-  // Do not generate loads of non-round integer types since these can
-  // be expensive (and would be wrong if the type is not byte sized).
-  if (!ExtVT.isRound())
-    return SDValue();
+    uint64_t ShiftAmt = N01->getZExtValue();
+    uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+    if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
+      ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
+    else
+      ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+                                VT.getSizeInBits() - ShiftAmt);
+  } else if (Opc == ISD::AND) {
+    // An AND with a constant mask is the same as a truncate + zero-extend.
+    auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (!AndC || !AndC->getAPIntValue().isMask())
+      return SDValue();
+
+    unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+    ExtType = ISD::ZEXTLOAD;
+    ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+  }
 
   unsigned ShAmt = 0;
   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
       ShAmt = N01->getZExtValue();
+      unsigned EVTBits = ExtVT.getSizeInBits();
       // Is the shift amount a multiple of size of VT?
       if ((ShAmt & (EVTBits-1)) == 0) {
         N0 = N0.getOperand(0);
@@ -7951,42 +8288,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     }
   }
 
-  // If we haven't found a load, we can't narrow it.  Don't transform one with
-  // multiple uses, this would require adding a new load.
-  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+  // If we haven't found a load, we can't narrow it.
+  if (!isa<LoadSDNode>(N0))
     return SDValue();
 
-  // Don't change the width of a volatile load.
   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-  if (LN0->isVolatile())
-    return SDValue();
-
-  // Verify that we are actually reducing a load width here.
-  if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
-    return SDValue();
-
-  // For the transform to be legal, the load must produce only two values
-  // (the value loaded and the chain).  Don't transform a pre-increment
-  // load, for example, which produces an extra value.  Otherwise the
-  // transformation is not equivalent, and the downstream logic to replace
-  // uses gets things wrong.
-  if (LN0->getNumValues() > 2)
-    return SDValue();
-
-  // If the load that we're shrinking is an extload and we're not just
-  // discarding the extension we can't simply shrink the load. Bail.
-  // TODO: It would be possible to merge the extensions in some cases.
-  if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
-      LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
-    return SDValue();
-
-  if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
-    return SDValue();
-
-  EVT PtrType = N0.getOperand(1).getValueType();
-
-  if (PtrType == MVT::Untyped || PtrType.isExtended())
-    // It's not possible to generate a constant of extended or untyped type.
+  if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
     return SDValue();
 
   // For big endian targets, we need to adjust the offset to the pointer to
@@ -7997,6 +8304,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
   }
 
+  EVT PtrType = N0.getOperand(1).getValueType();
   uint64_t PtrOff = ShAmt / 8;
   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
   SDLoc DL(LN0);
@@ -8130,10 +8438,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   }
 
   // fold (sext_inreg (extload x)) -> (sextload x)
+  // If sextload is not supported by target, we can only do the combine when
+  // load has one use. Doing otherwise can block folding the extload with other
+  // extends that the target does support.
   if (ISD::isEXTLoad(N0.getNode()) &&
       ISD::isUNINDEXEDLoad(N0.getNode()) &&
       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
+        N0.hasOneUse()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -8208,12 +8520,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // noop truncate
   if (N0.getValueType() == N->getValueType(0))
     return N0;
-  // fold (truncate c1) -> c1
-  if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
-    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+
   // fold (truncate (truncate x)) -> (truncate x)
   if (N0.getOpcode() == ISD::TRUNCATE)
     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+
+  // fold (truncate c1) -> c1
+  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+    SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+    if (C.getNode() != N)
+      return C;
+  }
+
   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
       N0.getOpcode() == ISD::SIGN_EXTEND ||
@@ -8245,7 +8563,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // we need to be more careful about the vector instructions that we generate.
   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
-
     EVT VecTy = N0.getOperand(0).getValueType();
     EVT ExTy = N0.getValueType();
     EVT TrTy = N->getValueType(0);
@@ -8311,7 +8628,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
       N0.getOperand(0).hasOneUse()) {
-
     SDValue BuildVect = N0.getOperand(0);
     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
     EVT TruncVecEltTy = VT.getVectorElementType();
@@ -8340,9 +8656,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // Currently we only perform this optimization on scalars because vectors
   // may have different active low bits.
   if (!VT.isVector()) {
-    if (SDValue Shorter =
-            GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
-                                                     VT.getSizeInBits())))
+    APInt Mask =
+        APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
+    if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
   }
 
@@ -8413,7 +8729,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // Fold truncate of a bitcast of a vector to an extract of the low vector
   // element.
   //
-  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
+  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
     SDValue VecSrc = N0.getOperand(0);
     EVT SrcVT = VecSrc.getValueType();
@@ -8423,8 +8739,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       SDLoc SL(N);
 
       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+      unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
-                         VecSrc, DAG.getConstant(0, SL, IdxVT));
+                         VecSrc, DAG.getConstant(Idx, SL, IdxVT));
     }
   }
 
@@ -8466,11 +8783,18 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
 
   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+
+  // A BUILD_PAIR is always having the least significant part in elt 0 and the
+  // most significant part in elt 1. So when combining into one large load, we
+  // need to consider the endianness.
+  if (DAG.getDataLayout().isBigEndian())
+    std::swap(LD1, LD2);
+
   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
       LD1->getAddressSpace() != LD2->getAddressSpace())
     return SDValue();
   EVT LD1VT = LD1->getValueType(0);
-  unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
+  unsigned LD1Bytes = LD1VT.getStoreSize();
   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
     unsigned Align = LD1->getAlignment();
@@ -8751,12 +9075,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       if (Op.getOpcode() == ISD::BITCAST &&
           Op.getOperand(0).getValueType() == VT)
         return SDValue(Op.getOperand(0));
-      if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+      if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
         return DAG.getBitcast(VT, Op);
       return SDValue();
     };
 
+    // FIXME: If either input vector is bitcast, try to convert the shuffle to
+    // the result type of this bitcast. This would eliminate at least one
+    // bitcast. See the transform in InstCombine.
     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
     if (!(SV0 && SV1))
@@ -8949,7 +9276,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   // Always prefer FMAD to FMA for precision.
   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
-  bool LookThroughFPExt = TLI.isFPExtFree(VT);
 
   // Is the node an FMUL and contractable either due to global flags or
   // SDNodeFlags.
@@ -8979,28 +9305,31 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   }
 
   // Look through FP_EXTEND nodes to do more combining.
-  if (LookThroughFPExt) {
-    // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
-    if (N0.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N00 = N0.getOperand(0);
-      if (isContractableFMUL(N00))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                       N00.getOperand(0)),
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                       N00.getOperand(1)), N1);
+
+  // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+  if (N0.getOpcode() == ISD::FP_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (isContractableFMUL(N00) &&
+        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                     N00.getOperand(0)),
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                     N00.getOperand(1)), N1);
     }
+  }
 
-    // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
-    // Note: Commutes FADD operands.
-    if (N1.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N10 = N1.getOperand(0);
-      if (isContractableFMUL(N10))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                       N10.getOperand(0)),
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                       N10.getOperand(1)), N0);
+  // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+  // Note: Commutes FADD operands.
+  if (N1.getOpcode() == ISD::FP_EXTEND) {
+    SDValue N10 = N1.getOperand(0);
+    if (isContractableFMUL(N10) &&
+        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                     N10.getOperand(0)),
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                     N10.getOperand(1)), N0);
     }
   }
 
@@ -9036,80 +9365,87 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
                                      N0));
     }
 
-    if (LookThroughFPExt) {
-      // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
-      //   -> (fma x, y, (fma (fpext u), (fpext v), z))
-      auto FoldFAddFMAFPExtFMul = [&] (
-          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
-        return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
-                           DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
-                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
-                                       Z));
-      };
-      if (N0.getOpcode() == PreferredFusedOpcode) {
-        SDValue N02 = N0.getOperand(2);
-        if (N02.getOpcode() == ISD::FP_EXTEND) {
-          SDValue N020 = N02.getOperand(0);
-          if (isContractableFMUL(N020))
-            return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
-                                        N020.getOperand(0), N020.getOperand(1),
-                                        N1);
+
+    // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+    //   -> (fma x, y, (fma (fpext u), (fpext v), z))
+    auto FoldFAddFMAFPExtFMul = [&] (
+      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                     Z));
+    };
+    if (N0.getOpcode() == PreferredFusedOpcode) {
+      SDValue N02 = N0.getOperand(2);
+      if (N02.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N020 = N02.getOperand(0);
+        if (isContractableFMUL(N020) &&
+            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+          return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+                                      N020.getOperand(0), N020.getOperand(1),
+                                      N1);
         }
       }
+    }
 
-      // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
-      //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
-      // FIXME: This turns two single-precision and one double-precision
-      // operation into two double-precision operations, which might not be
-      // interesting for all targets, especially GPUs.
-      auto FoldFAddFPExtFMAFMul = [&] (
-          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
-        return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
-                           DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
-                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
-                                       Z));
-      };
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == PreferredFusedOpcode) {
-          SDValue N002 = N00.getOperand(2);
-          if (isContractableFMUL(N002))
-            return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
-                                        N002.getOperand(0), N002.getOperand(1),
-                                        N1);
+    // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+    //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+    // FIXME: This turns two single-precision and one double-precision
+    // operation into two double-precision operations, which might not be
+    // interesting for all targets, especially GPUs.
+    auto FoldFAddFPExtFMAFMul = [&] (
+      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                     Z));
+    };
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == PreferredFusedOpcode) {
+        SDValue N002 = N00.getOperand(2);
+        if (isContractableFMUL(N002) &&
+            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+          return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+                                      N002.getOperand(0), N002.getOperand(1),
+                                      N1);
         }
       }
+    }
 
-      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
-      //   -> (fma y, z, (fma (fpext u), (fpext v), x))
-      if (N1.getOpcode() == PreferredFusedOpcode) {
-        SDValue N12 = N1.getOperand(2);
-        if (N12.getOpcode() == ISD::FP_EXTEND) {
-          SDValue N120 = N12.getOperand(0);
-          if (isContractableFMUL(N120))
-            return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
-                                        N120.getOperand(0), N120.getOperand(1),
-                                        N0);
+    // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+    //   -> (fma y, z, (fma (fpext u), (fpext v), x))
+    if (N1.getOpcode() == PreferredFusedOpcode) {
+      SDValue N12 = N1.getOperand(2);
+      if (N12.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N120 = N12.getOperand(0);
+        if (isContractableFMUL(N120) &&
+            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+          return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+                                      N120.getOperand(0), N120.getOperand(1),
+                                      N0);
         }
       }
+    }
 
-      // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
-      //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
-      // FIXME: This turns two single-precision and one double-precision
-      // operation into two double-precision operations, which might not be
-      // interesting for all targets, especially GPUs.
-      if (N1.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N10 = N1.getOperand(0);
-        if (N10.getOpcode() == PreferredFusedOpcode) {
-          SDValue N102 = N10.getOperand(2);
-          if (isContractableFMUL(N102))
-            return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
-                                        N102.getOperand(0), N102.getOperand(1),
-                                        N0);
+    // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+    //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+    // FIXME: This turns two single-precision and one double-precision
+    // operation into two double-precision operations, which might not be
+    // interesting for all targets, especially GPUs.
+    if (N1.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N10 = N1.getOperand(0);
+      if (N10.getOpcode() == PreferredFusedOpcode) {
+        SDValue N102 = N10.getOperand(2);
+        if (isContractableFMUL(N102) &&
+            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+          return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+                                      N102.getOperand(0), N102.getOperand(1),
+                                      N0);
         }
       }
     }
@@ -9151,7 +9487,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   // Always prefer FMAD to FMA for precision.
   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
-  bool LookThroughFPExt = TLI.isFPExtFree(VT);
 
   // Is the node an FMUL and contractable either due to global flags or
   // SDNodeFlags.
@@ -9187,79 +9522,83 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   }
 
   // Look through FP_EXTEND nodes to do more combining.
-  if (LookThroughFPExt) {
-    // fold (fsub (fpext (fmul x, y)), z)
-    //   -> (fma (fpext x), (fpext y), (fneg z))
-    if (N0.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N00 = N0.getOperand(0);
-      if (isContractableFMUL(N00))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                       N00.getOperand(0)),
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                       N00.getOperand(1)),
-                           DAG.getNode(ISD::FNEG, SL, VT, N1));
+
+  // fold (fsub (fpext (fmul x, y)), z)
+  //   -> (fma (fpext x), (fpext y), (fneg z))
+  if (N0.getOpcode() == ISD::FP_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (isContractableFMUL(N00) &&
+        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                     N00.getOperand(0)),
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                     N00.getOperand(1)),
+                         DAG.getNode(ISD::FNEG, SL, VT, N1));
     }
+  }
 
-    // fold (fsub x, (fpext (fmul y, z)))
-    //   -> (fma (fneg (fpext y)), (fpext z), x)
-    // Note: Commutes FSUB operands.
-    if (N1.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N10 = N1.getOperand(0);
-      if (isContractableFMUL(N10))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                           DAG.getNode(ISD::FNEG, SL, VT,
+  // fold (fsub x, (fpext (fmul y, z)))
+  //   -> (fma (fneg (fpext y)), (fpext z), x)
+  // Note: Commutes FSUB operands.
+  if (N1.getOpcode() == ISD::FP_EXTEND) {
+    SDValue N10 = N1.getOperand(0);
+    if (isContractableFMUL(N10) &&
+        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FNEG, SL, VT,
+                                     DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                 N10.getOperand(0))),
+                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                     N10.getOperand(1)),
+                         N0);
+    }
+  }
+
+  // fold (fsub (fpext (fneg (fmul, x, y))), z)
+  //   -> (fneg (fma (fpext x), (fpext y), z))
+  // Note: This could be removed with appropriate canonicalization of the
+  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+  // from implementing the canonicalization in visitFSUB.
+  if (N0.getOpcode() == ISD::FP_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() == ISD::FNEG) {
+      SDValue N000 = N00.getOperand(0);
+      if (isContractableFMUL(N000) &&
+          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+        return DAG.getNode(ISD::FNEG, SL, VT,
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                   N10.getOperand(0))),
-                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                       N10.getOperand(1)),
-                           N0);
-    }
-
-    // fold (fsub (fpext (fneg (fmul, x, y))), z)
-    //   -> (fneg (fma (fpext x), (fpext y), z))
-    // Note: This could be removed with appropriate canonicalization of the
-    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
-    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
-    // from implementing the canonicalization in visitFSUB.
-    if (N0.getOpcode() == ISD::FP_EXTEND) {
-      SDValue N00 = N0.getOperand(0);
-      if (N00.getOpcode() == ISD::FNEG) {
-        SDValue N000 = N00.getOperand(0);
-        if (isContractableFMUL(N000)) {
-          return DAG.getNode(ISD::FNEG, SL, VT,
-                             DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                     N000.getOperand(0)),
-                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                     N000.getOperand(1)),
-                                         N1));
-        }
+                                                   N000.getOperand(0)),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N000.getOperand(1)),
+                                       N1));
       }
     }
+  }
 
-    // fold (fsub (fneg (fpext (fmul, x, y))), z)
-    //   -> (fneg (fma (fpext x)), (fpext y), z)
-    // Note: This could be removed with appropriate canonicalization of the
-    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
-    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
-    // from implementing the canonicalization in visitFSUB.
-    if (N0.getOpcode() == ISD::FNEG) {
-      SDValue N00 = N0.getOperand(0);
-      if (N00.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N000 = N00.getOperand(0);
-        if (isContractableFMUL(N000)) {
-          return DAG.getNode(ISD::FNEG, SL, VT,
-                             DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                     N000.getOperand(0)),
-                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                     N000.getOperand(1)),
-                                         N1));
-        }
+  // fold (fsub (fneg (fpext (fmul, x, y))), z)
+  //   -> (fneg (fma (fpext x)), (fpext y), z)
+  // Note: This could be removed with appropriate canonicalization of the
+  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+  // from implementing the canonicalization in visitFSUB.
+  if (N0.getOpcode() == ISD::FNEG) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N000 = N00.getOperand(0);
+      if (isContractableFMUL(N000) &&
+          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+        return DAG.getNode(ISD::FNEG, SL, VT,
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N000.getOperand(0)),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N000.getOperand(1)),
+                                       N1));
       }
     }
-
   }
 
   // More folding opportunities when target permits.
@@ -9298,102 +9637,108 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
                                      N21, N0));
     }
 
-    if (LookThroughFPExt) {
-      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
-      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
-      if (N0.getOpcode() == PreferredFusedOpcode) {
-        SDValue N02 = N0.getOperand(2);
-        if (N02.getOpcode() == ISD::FP_EXTEND) {
-          SDValue N020 = N02.getOperand(0);
-          if (isContractableFMUL(N020))
-            return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                               N0.getOperand(0), N0.getOperand(1),
-                               DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                       N020.getOperand(0)),
-                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                       N020.getOperand(1)),
-                                           DAG.getNode(ISD::FNEG, SL, VT,
-                                                       N1)));
-        }
-      }
 
-      // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
-      //   -> (fma (fpext x), (fpext y),
-      //           (fma (fpext u), (fpext v), (fneg z)))
-      // FIXME: This turns two single-precision and one double-precision
-      // operation into two double-precision operations, which might not be
-      // interesting for all targets, especially GPUs.
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == PreferredFusedOpcode) {
-          SDValue N002 = N00.getOperand(2);
-          if (isContractableFMUL(N002))
-            return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                           N00.getOperand(0)),
-                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                           N00.getOperand(1)),
-                               DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                       N002.getOperand(0)),
-                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                       N002.getOperand(1)),
-                                           DAG.getNode(ISD::FNEG, SL, VT,
-                                                       N1)));
-        }
-      }
-
-      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
-      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
-      if (N1.getOpcode() == PreferredFusedOpcode &&
-        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
-        SDValue N120 = N1.getOperand(2).getOperand(0);
-        if (isContractableFMUL(N120)) {
-          SDValue N1200 = N120.getOperand(0);
-          SDValue N1201 = N120.getOperand(1);
+    // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+    //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+    if (N0.getOpcode() == PreferredFusedOpcode) {
+      SDValue N02 = N0.getOperand(2);
+      if (N02.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N020 = N02.getOperand(0);
+        if (isContractableFMUL(N020) &&
+            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
           return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
-                             N1.getOperand(1),
+                             N0.getOperand(0), N0.getOperand(1),
                              DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                         DAG.getNode(ISD::FNEG, SL, VT,
-                                             DAG.getNode(ISD::FP_EXTEND, SL,
-                                                         VT, N1200)),
                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                     N1201),
-                                         N0));
+                                                     N020.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N020.getOperand(1)),
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                                     N1)));
         }
       }
+    }
 
-      // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
-      //   -> (fma (fneg (fpext y)), (fpext z),
-      //           (fma (fneg (fpext u)), (fpext v), x))
-      // FIXME: This turns two single-precision and one double-precision
-      // operation into two double-precision operations, which might not be
-      // interesting for all targets, especially GPUs.
-      if (N1.getOpcode() == ISD::FP_EXTEND &&
-        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
-        SDValue N100 = N1.getOperand(0).getOperand(0);
-        SDValue N101 = N1.getOperand(0).getOperand(1);
-        SDValue N102 = N1.getOperand(0).getOperand(2);
-        if (isContractableFMUL(N102)) {
-          SDValue N1020 = N102.getOperand(0);
-          SDValue N1021 = N102.getOperand(1);
+    // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+    //   -> (fma (fpext x), (fpext y),
+    //           (fma (fpext u), (fpext v), (fneg z)))
+    // FIXME: This turns two single-precision and one double-precision
+    // operation into two double-precision operations, which might not be
+    // interesting for all targets, especially GPUs.
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == PreferredFusedOpcode) {
+        SDValue N002 = N00.getOperand(2);
+        if (isContractableFMUL(N002) &&
+            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
           return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                             DAG.getNode(ISD::FNEG, SL, VT,
-                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                     N100)),
-                             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+                             DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                         N00.getOperand(0)),
+                             DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                         N00.getOperand(1)),
                              DAG.getNode(PreferredFusedOpcode, SL, VT,
-                                         DAG.getNode(ISD::FNEG, SL, VT,
-                                             DAG.getNode(ISD::FP_EXTEND, SL,
-                                                         VT, N1020)),
                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                                     N1021),
-                                         N0));
+                                                     N002.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N002.getOperand(1)),
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                                     N1)));
         }
       }
     }
+
+    // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+    //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+    if (N1.getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+      SDValue N120 = N1.getOperand(2).getOperand(0);
+      if (isContractableFMUL(N120) &&
+          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+        SDValue N1200 = N120.getOperand(0);
+        SDValue N1201 = N120.getOperand(1);
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+                           N1.getOperand(1),
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FNEG, SL, VT,
+                                                   DAG.getNode(ISD::FP_EXTEND, SL,
+                                                               VT, N1200)),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N1201),
+                                       N0));
+      }
+    }
+
+    // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+    //   -> (fma (fneg (fpext y)), (fpext z),
+    //           (fma (fneg (fpext u)), (fpext v), x))
+    // FIXME: This turns two single-precision and one double-precision
+    // operation into two double-precision operations, which might not be
+    // interesting for all targets, especially GPUs.
+    if (N1.getOpcode() == ISD::FP_EXTEND &&
+        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+      SDValue CvtSrc = N1.getOperand(0);
+      SDValue N100 = CvtSrc.getOperand(0);
+      SDValue N101 = CvtSrc.getOperand(1);
+      SDValue N102 = CvtSrc.getOperand(2);
+      if (isContractableFMUL(N102) &&
+          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+        SDValue N1020 = N102.getOperand(0);
+        SDValue N1021 = N102.getOperand(1);
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FNEG, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N100)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FNEG, SL, VT,
+                                                   DAG.getNode(ISD::FP_EXTEND, SL,
+                                                               VT, N1020)),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N1021),
+                                       N0));
+      }
+    }
   }
 
   return SDValue();
@@ -9959,6 +10304,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
       // TODO: The FMA node should have flags that propagate to this node.
       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
     }
+
+    // fma (fneg x), K, y -> fma x -K, y
+    if (N0.getOpcode() == ISD::FNEG &&
+        (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+         (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
+      return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+                         DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
+    }
   }
 
   if (Options.UnsafeFPMath) {
@@ -10081,8 +10434,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
           (!LegalOperations ||
            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
            // backend)... we should handle this gracefully after Legalize.
-           // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
-           TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+           // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+           TLI.isOperationLegal(ISD::ConstantFP, VT) ||
            TLI.isFPImmLegal(Recip, VT)))
         return DAG.getNode(ISD::FMUL, DL, VT, N0,
                            DAG.getConstantFP(Recip, DL, VT), Flags);
@@ -10264,7 +10617,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
-       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+       TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
 
   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -10282,7 +10635,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
         !VT.isVector() &&
         (!LegalOperations ||
-         TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+         TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
       SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0), N0.getOperand(1),
@@ -10296,7 +10649,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
         (!LegalOperations ||
-         TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+         TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
       SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
@@ -10318,7 +10671,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
       (!LegalOperations ||
-       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+       TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
 
   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -10333,10 +10686,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   // The next optimizations are desirable only if SELECT_CC can be lowered.
   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
-
     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
         (!LegalOperations ||
-         TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+         TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
       SDLoc DL(N);
       SDValue Ops[] =
         { N0.getOperand(0), N0.getOperand(1),
@@ -10557,6 +10909,19 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
 
+  // fold ftrunc (known rounded int x) -> x
+  // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
+  // likely to be generated to extract integer from a rounded floating value.
+  switch (N0.getOpcode()) {
+  default: break;
+  case ISD::FRINT:
+  case ISD::FTRUNC:
+  case ISD::FNEARBYINT:
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+    return N0;
+  }
+
   return SDValue();
 }
 
@@ -11160,6 +11525,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   // Replace the uses of Ptr with uses of the updated base value.
   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
   deleteAndRecombine(Ptr.getNode());
+  AddToWorklist(Result.getNode());
 
   return true;
 }
@@ -11445,6 +11811,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
 }
 
 namespace {
+
 /// \brief Helper structure used to slice a load in smaller loads.
 /// Basically a slice is obtained from the following sequence:
 /// Origin = load Ty1, Base
@@ -11462,21 +11829,19 @@ struct LoadedSlice {
   struct Cost {
     /// Are we optimizing for code size.
     bool ForCodeSize;
+
     /// Various cost.
-    unsigned Loads;
-    unsigned Truncates;
-    unsigned CrossRegisterBanksCopies;
-    unsigned ZExts;
-    unsigned Shift;
+    unsigned Loads = 0;
+    unsigned Truncates = 0;
+    unsigned CrossRegisterBanksCopies = 0;
+    unsigned ZExts = 0;
+    unsigned Shift = 0;
 
-    Cost(bool ForCodeSize = false)
-        : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
-          CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
+    Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
 
     /// \brief Get the cost of one isolated slice.
     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
-        : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
-          CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
+        : ForCodeSize(ForCodeSize), Loads(1) {
       EVT TruncType = LS.Inst->getValueType(0);
       EVT LoadedType = LS.getLoadedType();
       if (TruncType != LoadedType &&
@@ -11538,13 +11903,17 @@ struct LoadedSlice {
 
     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
   };
+
   // The last instruction that represent the slice. This should be a
   // truncate instruction.
   SDNode *Inst;
+
   // The original load instruction.
   LoadSDNode *Origin;
+
   // The right shift amount in bits from the original load.
   unsigned Shift;
+
   // The DAG from which Origin came from.
   // This is used to get some contextual information about legal types, etc.
   SelectionDAG *DAG;
@@ -11746,7 +12115,8 @@ struct LoadedSlice {
     return true;
   }
 };
-}
+
+} // end anonymous namespace
 
 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
 /// \p UsedBits looks like 0..0 1..1 0..0.
@@ -11804,7 +12174,6 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
                 // Set the beginning of the pair.
                                                            First = Second) {
-
     Second = &LoadedSlices[CurrSlice];
 
     // If First is NULL, it means we start a new pair.
@@ -11935,7 +12304,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
     // will be across several bytes. We do not support that.
     unsigned Width = User->getValueSizeInBits(0);
     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
-      return 0;
+      return false;
 
     // Build the slice for this chain of computations.
     LoadedSlice LS(User, LD, Shift, &DAG);
@@ -12060,7 +12429,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   return Result;
 }
 
-
 /// Check to see if IVal is something that provides a value as specified by
 /// MaskInfo. If so, replace the specified store with a narrower store of
 /// truncated IVal.
@@ -12121,7 +12489,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
       .getNode();
 }
 
-
 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
 /// narrowing the load and store if it would end up being a win for performance
@@ -12325,7 +12692,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
 
   // Walk all the users of the constant with which we're multiplying.
   for (SDNode *Use : ConstNode->uses()) {
-
     if (Use == MulNode) // This use is the one we're on right now. Skip it.
       continue;
 
@@ -12376,6 +12742,12 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
   return false;
 }
 
+static SDValue peekThroughBitcast(SDValue V) {
+  while (V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  return V;
+}
+
 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
                                          unsigned NumStores) {
   SmallVector<SDValue, 8> Chains;
@@ -12403,56 +12775,93 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   if (NumStores < 2)
     return false;
 
-  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
-
   // The latest Node in the DAG.
   SDLoc DL(StoreNodes[0].MemNode);
 
-  SDValue StoredVal;
+  int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+  unsigned SizeInBits = NumStores * ElementSizeBits;
+  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+
+  EVT StoreTy;
   if (UseVector) {
-    bool IsVec = MemVT.isVector();
-    unsigned Elts = NumStores;
-    if (IsVec) {
-      // When merging vector stores, get the total number of elements.
-      Elts *= MemVT.getVectorNumElements();
-    }
+    unsigned Elts = NumStores * NumMemElts;
     // Get the type for the merged vector store.
-    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
-    assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+    StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+  } else
+    StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
 
+  SDValue StoredVal;
+  if (UseVector) {
     if (IsConstantSrc) {
       SmallVector<SDValue, 8> BuildVector;
-      for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+      for (unsigned I = 0; I != NumStores; ++I) {
         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
         SDValue Val = St->getValue();
-        if (MemVT.getScalarType().isInteger())
-          if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
-            Val = DAG.getConstant(
-                (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
-                SDLoc(CFP), MemVT);
+        // If constant is of the wrong type, convert it now.
+        if (MemVT != Val.getValueType()) {
+          Val = peekThroughBitcast(Val);
+          // Deal with constants of wrong size.
+          if (ElementSizeBits != Val.getValueSizeInBits()) {
+            EVT IntMemVT =
+                EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+            if (isa<ConstantFPSDNode>(Val)) {
+              // Not clear how to truncate FP values.
+              return false;
+            } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
+              Val = DAG.getConstant(C->getAPIntValue()
+                                        .zextOrTrunc(Val.getValueSizeInBits())
+                                        .zextOrTrunc(ElementSizeBits),
+                                    SDLoc(C), IntMemVT);
+          }
+          // Make sure correctly size type is the correct type.
+          Val = DAG.getBitcast(MemVT, Val);
+        }
         BuildVector.push_back(Val);
       }
-      StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
+      StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+                                               : ISD::BUILD_VECTOR,
+                              DL, StoreTy, BuildVector);
     } else {
       SmallVector<SDValue, 8> Ops;
       for (unsigned i = 0; i < NumStores; ++i) {
         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-        SDValue Val = St->getValue();
-        // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
-        if (Val.getValueType() != MemVT)
-          return false;
+        SDValue Val = peekThroughBitcast(St->getValue());
+        // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
+        // type MemVT. If the underlying value is not the correct
+        // type, but it is an extraction of an appropriate vector we
+        // can recast Val to be of the correct type. This may require
+        // converting between EXTRACT_VECTOR_ELT and
+        // EXTRACT_SUBVECTOR.
+        if ((MemVT != Val.getValueType()) &&
+            (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+             Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
+          SDValue Vec = Val.getOperand(0);
+          EVT MemVTScalarTy = MemVT.getScalarType();
+          // We may need to add a bitcast here to get types to line up.
+          if (MemVTScalarTy != Vec.getValueType()) {
+            unsigned Elts = Vec.getValueType().getSizeInBits() /
+                            MemVTScalarTy.getSizeInBits();
+            EVT NewVecTy =
+                EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
+            Vec = DAG.getBitcast(NewVecTy, Vec);
+          }
+          auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
+                                        : ISD::EXTRACT_VECTOR_ELT;
+          Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
+        }
         Ops.push_back(Val);
       }
 
       // Build the extracted vector elements back into a vector.
-      StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
-                              DL, Ty, Ops);    }
+      StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+                                               : ISD::BUILD_VECTOR,
+                              DL, StoreTy, Ops);
+    }
   } else {
     // We should always use a vector store when merging extracted vector
     // elements, so this path implies a store of constants.
     assert(IsConstantSrc && "Merged vector elements should use vector store");
 
-    unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
     APInt StoreInt(SizeInBits, 0);
 
     // Construct a single integer constant which is made of the smaller
@@ -12463,18 +12872,25 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
 
       SDValue Val = St->getValue();
-      StoreInt <<= ElementSizeBytes * 8;
+      StoreInt <<= ElementSizeBits;
       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
-        StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
+        StoreInt |= C->getAPIntValue()
+                        .zextOrTrunc(ElementSizeBits)
+                        .zextOrTrunc(SizeInBits);
       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
-        StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
+        StoreInt |= C->getValueAPF()
+                        .bitcastToAPInt()
+                        .zextOrTrunc(ElementSizeBits)
+                        .zextOrTrunc(SizeInBits);
+        // If fp truncation is necessary give up for now.
+        if (MemVT.getSizeInBits() != ElementSizeBits)
+          return false;
       } else {
         llvm_unreachable("Invalid constant element type");
       }
     }
 
     // Create the new Load and Store operations.
-    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
   }
 
@@ -12483,7 +12899,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
 
   // make sure we use trunc store if it's necessary to be legal.
   SDValue NewStore;
-  if (UseVector || !UseTrunc) {
+  if (!UseTrunc) {
     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
                             FirstInChain->getPointerInfo(),
                             FirstInChain->getAlignment());
@@ -12517,6 +12933,7 @@ void DAGCombiner::getStoreMergeCandidates(
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
   EVT MemVT = St->getMemoryVT();
 
+  SDValue Val = peekThroughBitcast(St->getValue());
   // We must have a base and an offset.
   if (!BasePtr.getBase().getNode())
     return;
@@ -12525,47 +12942,62 @@ void DAGCombiner::getStoreMergeCandidates(
   if (BasePtr.getBase().isUndef())
     return;
 
-  bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
-                       isa<ConstantFPSDNode>(St->getValue());
-  bool IsExtractVecSrc =
-      (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
-       St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
-  bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+  bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
+  bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+                          Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+  bool IsLoadSrc = isa<LoadSDNode>(Val);
   BaseIndexOffset LBasePtr;
   // Match on loadbaseptr if relevant.
-  if (IsLoadSrc)
-    LBasePtr = BaseIndexOffset::match(
-        cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
-
+  EVT LoadVT;
+  if (IsLoadSrc) {
+    auto *Ld = cast<LoadSDNode>(Val);
+    LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+    LoadVT = Ld->getMemoryVT();
+    // Load and store should be the same type.
+    if (MemVT != LoadVT)
+      return;
+  }
   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
                             int64_t &Offset) -> bool {
     if (Other->isVolatile() || Other->isIndexed())
       return false;
-    // We can merge constant floats to equivalent integers
-    if (Other->getMemoryVT() != MemVT)
-      if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
-            isa<ConstantFPSDNode>(Other->getValue())))
-        return false;
+    SDValue Val = peekThroughBitcast(Other->getValue());
+    // Allow merging constants of different types as integers.
+    bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
+                                           : Other->getMemoryVT() != MemVT;
     if (IsLoadSrc) {
+      if (NoTypeMatch)
+        return false;
       // The Load's Base Ptr must also match
-      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
+      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
         auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+        if (LoadVT != OtherLd->getMemoryVT())
+          return false;
         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
           return false;
       } else
         return false;
     }
-    if (IsConstantSrc)
-      if (!(isa<ConstantSDNode>(Other->getValue()) ||
-            isa<ConstantFPSDNode>(Other->getValue())))
+    if (IsConstantSrc) {
+      if (NoTypeMatch)
         return false;
-    if (IsExtractVecSrc)
-      if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
-            Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
+      if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+        return false;
+    }
+    if (IsExtractVecSrc) {
+      // Do not merge truncated stores here.
+      if (Other->isTruncatingStore())
         return false;
+      if (!MemVT.bitsEq(Val.getValueType()))
+        return false;
+      if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+          Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+        return false;
+    }
     Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   };
+
   // We looking for a root node which is an ancestor to all mergable
   // stores. We search up through a load, to our root and then down
   // through all children. For instance we will find Store{1,2,3} if
@@ -12612,10 +13044,8 @@ void DAGCombiner::getStoreMergeCandidates(
 // indirectly through its operand (we already consider dependencies
 // through the chain). Check in parallel by searching up from
 // non-chain operands of candidates.
-
 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
-
   // FIXME: We should be able to truncate a full search of
   // predecessors by doing a BFS and keeping tabs the originating
   // stores from which worklist nodes come from in a similar way to
@@ -12648,12 +13078,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
     return false;
 
   EVT MemVT = St->getMemoryVT();
-  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+  int64_t ElementSizeBytes = MemVT.getStoreSize();
+  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
 
   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
     return false;
 
-  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+  bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
       Attribute::NoImplicitFloat);
 
   // This function cannot currently deal with non-byte-sized memory sizes.
@@ -12665,7 +13096,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
 
   // Perform an early exit check. Do not bother looking at stored values that
   // are not constants, loads, or extracted vector elements.
-  SDValue StoredVal = St->getValue();
+  SDValue StoredVal = peekThroughBitcast(St->getValue());
   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
                        isa<ConstantFPSDNode>(StoredVal);
@@ -12675,12 +13106,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
     return false;
 
-  // Don't merge vectors into wider vectors if the source data comes from loads.
-  // TODO: This restriction can be lifted by using logic similar to the
-  // ExtractVecSrc case.
-  if (MemVT.isVector() && IsLoadSrc)
-    return false;
-
   SmallVector<MemOpLink, 8> StoreNodes;
   // Find potential store merge candidates by searching through chain sub-DAG
   getStoreMergeCandidates(St, StoreNodes);
@@ -12759,19 +13184,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
       unsigned LastLegalVectorType = 1;
       bool LastIntegerTrunc = false;
       bool NonZero = false;
+      unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
         SDValue StoredVal = ST->getValue();
-
-        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
-          NonZero |= !C->isNullValue();
-        } else if (ConstantFPSDNode *C =
-                       dyn_cast<ConstantFPSDNode>(StoredVal)) {
-          NonZero |= !C->getConstantFPValue()->isNullValue();
-        } else {
-          // Non-constant.
-          break;
+        bool IsElementZero = false;
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+          IsElementZero = C->isNullValue();
+        else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+          IsElementZero = C->getConstantFPValue()->isNullValue();
+        if (IsElementZero) {
+          if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+            FirstZeroAfterNonZero = i;
         }
+        NonZero |= !IsElementZero;
 
         // Find a legal type for the constant store.
         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
@@ -12791,8 +13217,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
-              TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
-                                     FirstStoreAS, FirstStoreAlign, &IsFast) &&
+              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+                                     FirstStoreAlign, &IsFast) &&
               IsFast) {
             LastIntegerTrunc = true;
             LastLegalType = i + 1;
@@ -12806,13 +13232,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
             !NoVectors) {
           // Find a legal type for the vector store.
-          unsigned Elts = i + 1;
-          if (MemVT.isVector()) {
-            // When merging vector stores, get the total number of elements.
-            Elts *= MemVT.getVectorNumElements();
-          }
+          unsigned Elts = (i + 1) * NumMemElts;
           EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
-          if (TLI.isTypeLegal(Ty) &&
+          if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
                                      FirstStoreAlign, &IsFast) &&
@@ -12821,23 +13243,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
         }
       }
 
+      bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+      unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
       // Check if we found a legal integer type that creates a meaningful merge.
-      if (LastLegalType < 2 && LastLegalVectorType < 2) {
-        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+      if (NumElem < 2) {
+        // We know that candidate stores are in order and of correct
+        // shape. While there is no mergeable sequence from the
+        // beginning one may start later in the sequence. The only
+        // reason a merge of size N could have failed where another of
+        // the same size would not have, is if the alignment has
+        // improved or we've dropped a non-zero value. Drop as many
+        // candidates as we can here.
+        unsigned NumSkip = 1;
+        while (
+            (NumSkip < NumConsecutiveStores) &&
+            (NumSkip < FirstZeroAfterNonZero) &&
+            (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
+          NumSkip++;
+        }
+        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
         continue;
       }
 
-      bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
-      unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
-
       bool Merged = MergeStoresOfConstantsOrVecElts(
           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
-      if (!Merged) {
-        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
-        continue;
-      }
+      RV |= Merged;
+
       // Remove merged stores for next iteration.
-      RV = true;
       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
       continue;
     }
@@ -12849,25 +13282,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
       unsigned FirstStoreAlign = FirstInChain->getAlignment();
       unsigned NumStoresToMerge = 1;
-      bool IsVec = MemVT.isVector();
       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-        unsigned StoreValOpcode = St->getValue().getOpcode();
+        SDValue StVal = peekThroughBitcast(St->getValue());
         // This restriction could be loosened.
         // Bail out if any stored values are not elements extracted from a
         // vector. It should be possible to handle mixed sources, but load
         // sources need more careful handling (see the block of code below that
         // handles consecutive loads).
-        if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
-            StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
+        if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+            StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
           return RV;
 
         // Find a legal type for the vector store.
-        unsigned Elts = i + 1;
-        if (IsVec) {
-          // When merging vector stores, get the total number of elements.
-          Elts *= MemVT.getVectorNumElements();
-        }
+        unsigned Elts = (i + 1) * NumMemElts;
         EVT Ty =
             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
         bool IsFast;
@@ -12879,6 +13307,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
           NumStoresToMerge = i + 1;
       }
 
+      // Check if we found a legal integer type that creates a meaningful merge.
+      if (NumStoresToMerge < 2) {
+        // We know that candidate stores are in order and of correct
+        // shape. While there is no mergeable sequence from the
+        // beginning one may start later in the sequence. The only
+        // reason a merge of size N could have failed where another of
+        // the same size would not have, is if the alignment has
+        // improved. Drop as many candidates as we can here.
+        unsigned NumSkip = 1;
+        while ((NumSkip < NumConsecutiveStores) &&
+               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+          NumSkip++;
+
+        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+        continue;
+      }
+
       bool Merged = MergeStoresOfConstantsOrVecElts(
           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
       if (!Merged) {
@@ -12905,7 +13350,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
     BaseIndexOffset LdBasePtr;
     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
-      LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+      SDValue Val = peekThroughBitcast(St->getValue());
+      LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
       if (!Ld)
         break;
 
@@ -12917,10 +13363,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
       if (Ld->isVolatile() || Ld->isIndexed())
         break;
 
-      // We do not accept ext loads.
-      if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
-        break;
-
       // The stored memory type must be the same.
       if (Ld->getMemoryVT() != MemVT)
         break;
@@ -12986,7 +13428,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
         isDereferenceable = false;
 
       // Find a legal type for the vector store.
-      EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
+      unsigned Elts = (i + 1) * NumMemElts;
+      EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+
       bool IsFastSt, IsFastLd;
       if (TLI.isTypeLegal(StoreTy) &&
           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
@@ -13023,8 +13467,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
                                StoreTy) &&
             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
-            TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
-                                   FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+                                   FirstStoreAlign, &IsFastSt) &&
             IsFastSt &&
             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
                                    FirstLoadAlign, &IsFastLd) &&
@@ -13047,7 +13491,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
     NumElem = std::min(LastLegalType, NumElem);
 
     if (NumElem < 2) {
-      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+      // We know that candidate stores are in order and of correct
+      // shape. While there is no mergeable sequence from the
+      // beginning one may start later in the sequence. The only
+      // reason a merge of size N could have failed where another of
+      // the same size would not have is if the alignment or either
+      // the load or store has improved. Drop as many candidates as we
+      // can here.
+      unsigned NumSkip = 1;
+      while ((NumSkip < LoadNodes.size()) &&
+             (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+        NumSkip++;
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
       continue;
     }
 
@@ -13055,7 +13511,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
     // to memory.
     EVT JointMemOpVT;
     if (UseVectorTy) {
-      JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
+      // Find a legal type for the vector store.
+      unsigned Elts = NumElem * NumMemElts;
+      JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
     } else {
       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
@@ -13104,12 +13562,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
                                     SDValue(NewLoad.getNode(), 1));
     }
 
-    // Replace the all stores with the new store.
-    for (unsigned i = 0; i < NumElem; ++i)
+    // Replace the all stores with the new store. Recursively remove
+    // corresponding value if its no longer used.
+    for (unsigned i = 0; i < NumElem; ++i) {
+      SDValue Val = StoreNodes[i].MemNode->getOperand(1);
       CombineTo(StoreNodes[i].MemNode, NewStore);
+      if (Val.getNode()->use_empty())
+        recursivelyDeleteUnusedNodes(Val.getNode());
+    }
+
     RV = true;
     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
-    continue;
   }
   return RV;
 }
@@ -13284,7 +13747,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // See if we can simplify the input to this truncstore with knowledge that
     // only the low bits are being used.  For example:
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
-    SDValue Shorter = GetDemandedBits(
+    SDValue Shorter = DAG.GetDemandedBits(
         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
                                     ST->getMemoryVT().getScalarSizeInBits()));
     AddToWorklist(Value.getNode());
@@ -13356,11 +13819,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
   }
 
-  // Only perform this optimization before the types are legal, because we
-  // don't want to perform this optimization on every DAGCombine invocation.
-  if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
-                                           : !LegalTypes) {
-    for (;;) {
+  // Always perform this optimization before types are legal. If the target
+  // prefers, also try this after legalization to catch stores that were created
+  // by intrinsics or other nodes.
+  if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+    while (true) {
       // There can be multiple store sequences on the same chain.
       // Keep trying to merge store sequences until we are unable to do so
       // or until we merge the last store on the chain.
@@ -13499,6 +13962,60 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
   return St1;
 }
 
+/// Convert a disguised subvector insertion into a shuffle:
+/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
+/// bitcast(shuffle (bitcast V), (extended X), Mask)
+/// Note: We do not use an insert_subvector node because that requires a legal
+/// subvector type.
+SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+  SDValue InsertVal = N->getOperand(1);
+  if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
+      !InsertVal.getOperand(0).getValueType().isVector())
+    return SDValue();
+
+  SDValue SubVec = InsertVal.getOperand(0);
+  SDValue DestVec = N->getOperand(0);
+  EVT SubVecVT = SubVec.getValueType();
+  EVT VT = DestVec.getValueType();
+  unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+  unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
+  unsigned NumMaskVals = ExtendRatio * NumSrcElts;
+
+  // Step 1: Create a shuffle mask that implements this insert operation. The
+  // vector that we are inserting into will be operand 0 of the shuffle, so
+  // those elements are just 'i'. The inserted subvector is in the first
+  // positions of operand 1 of the shuffle. Example:
+  // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
+  SmallVector<int, 16> Mask(NumMaskVals);
+  for (unsigned i = 0; i != NumMaskVals; ++i) {
+    if (i / NumSrcElts == InsIndex)
+      Mask[i] = (i % NumSrcElts) + NumMaskVals;
+    else
+      Mask[i] = i;
+  }
+
+  // Bail out if the target can not handle the shuffle we want to create.
+  EVT SubVecEltVT = SubVecVT.getVectorElementType();
+  EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
+  if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
+    return SDValue();
+
+  // Step 2: Create a wide vector from the inserted source vector by appending
+  // undefined elements. This is the same size as our destination vector.
+  SDLoc DL(N);
+  SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
+  ConcatOps[0] = SubVec;
+  SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
+
+  // Step 3: Shuffle in the padded subvector.
+  SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
+  SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
+  AddToWorklist(PaddedSubV.getNode());
+  AddToWorklist(DestVecBC.getNode());
+  AddToWorklist(Shuf.getNode());
+  return DAG.getBitcast(VT, Shuf);
+}
+
 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   SDValue InVec = N->getOperand(0);
   SDValue InVal = N->getOperand(1);
@@ -13511,10 +14028,20 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
 
   EVT VT = InVec.getValueType();
 
-  // Check that we know which element is being inserted
-  if (!isa<ConstantSDNode>(EltNo))
+  // Remove redundant insertions:
+  // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
+  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
+    return InVec;
+
+  // We must know which element is being inserted for folds below here.
+  auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+  if (!IndexC)
     return SDValue();
-  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  unsigned Elt = IndexC->getZExtValue();
+
+  if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
+    return Shuf;
 
   // Canonicalize insert_vector_elt dag nodes.
   // Example:
@@ -13692,9 +14219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     // converts.
   }
 
-  // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
+  // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
+  bool isLE = DAG.getDataLayout().isLittleEndian();
+  unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
-      ConstEltNo->isNullValue() && VT.isInteger()) {
+      ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
     SDValue BCSrc = InVec.getOperand(0);
     if (BCSrc.getValueType().isScalarInteger())
       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
@@ -13748,7 +14277,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     // FIXME: We should handle recursing on other vector shuffles and
     // scalar_to_vector here as well.
 
-    if (!LegalOperations) {
+    if (!LegalOperations ||
+        // FIXME: Should really be just isOperationLegalOrCustom.
+        TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
+        TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
@@ -14054,10 +14586,18 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
   EVT InVT1 = VecIn1.getValueType();
   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
 
-  unsigned Vec2Offset = InVT1.getVectorNumElements();
+  unsigned Vec2Offset = 0;
   unsigned NumElems = VT.getVectorNumElements();
   unsigned ShuffleNumElems = NumElems;
 
+  // In case both the input vectors are extracted from same base
+  // vector we do not need extra addend (Vec2Offset) while
+  // computing shuffle mask.
+  if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
+      !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
+      !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
+    Vec2Offset = InVT1.getVectorNumElements();
+
   // We can't generate a shuffle node with mismatched input and output types.
   // Try to make the types match the type of the output.
   if (InVT1 != VT || InVT2 != VT) {
@@ -14072,7 +14612,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
       VecIn2 = SDValue();
     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
-      if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+      if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
         return SDValue();
 
       if (!VecIn2.getNode()) {
@@ -14204,7 +14744,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
         !isa<ConstantSDNode>(Op.getOperand(1)))
       return SDValue();
-
     SDValue ExtractedFromVec = Op.getOperand(0);
 
     // All inputs must have the same element type as the output.
@@ -14227,6 +14766,50 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
   if (VecIn.size() < 2)
     return SDValue();
 
+  // If all the Operands of BUILD_VECTOR extract from same
+  // vector, then split the vector efficiently based on the maximum
+  // vector access index and adjust the VectorMask and
+  // VecIn accordingly.
+  if (VecIn.size() == 2) {
+    unsigned MaxIndex = 0;
+    unsigned NearestPow2 = 0;
+    SDValue Vec = VecIn.back();
+    EVT InVT = Vec.getValueType();
+    MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+    SmallVector<unsigned, 8> IndexVec(NumElems, 0);
+
+    for (unsigned i = 0; i < NumElems; i++) {
+      if (VectorMask[i] <= 0)
+        continue;
+      unsigned Index = N->getOperand(i).getConstantOperandVal(1);
+      IndexVec[i] = Index;
+      MaxIndex = std::max(MaxIndex, Index);
+    }
+
+    NearestPow2 = PowerOf2Ceil(MaxIndex);
+    if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
+        NumElems * 2 < NearestPow2) {
+      unsigned SplitSize = NearestPow2 / 2;
+      EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+                                     InVT.getVectorElementType(), SplitSize);
+      if (TLI.isTypeLegal(SplitVT)) {
+        SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+                                     DAG.getConstant(SplitSize, DL, IdxTy));
+        SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+                                     DAG.getConstant(0, DL, IdxTy));
+        VecIn.pop_back();
+        VecIn.push_back(VecIn1);
+        VecIn.push_back(VecIn2);
+
+        for (unsigned i = 0; i < NumElems; i++) {
+          if (VectorMask[i] <= 0)
+            continue;
+          VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
+        }
+      }
+    }
+  }
+
   // TODO: We want to sort the vectors by descending length, so that adjacent
   // pairs have similar length, and the longer vector is always first in the
   // pair.
@@ -14315,77 +14898,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
     }
   }
-
   return Shuffles[0];
 }
 
-// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
-// operations which can be matched to a truncate.
-SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
-  // TODO: Add support for big-endian.
-  if (DAG.getDataLayout().isBigEndian())
-    return SDValue();
-  if (N->getNumOperands() < 2)
-    return SDValue();
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
-  unsigned NumElems = N->getNumOperands();
-
-  if (!isTypeLegal(VT))
-    return SDValue();
-
-  // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
-  // index, bail out.
-  // TODO: Allow undef elements in some cases?
-  if (any_of(N->ops(), [VT](SDValue Op) {
-        return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-               !isa<ConstantSDNode>(Op.getOperand(1)) ||
-               Op.getValueType() != VT.getVectorElementType();
-      }))
-    return SDValue();
-
-  // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
-  auto GetExtractIdx = [](SDValue Extract) {
-    return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
-  };
-
-  // The first BUILD_VECTOR operand must be an an extract from index zero
-  // (assuming no undef and little-endian).
-  if (GetExtractIdx(N->getOperand(0)) != 0)
-    return SDValue();
-
-  // Compute the stride from the first index.
-  int Stride = GetExtractIdx(N->getOperand(1));
-  SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
-
-  // Proceed only if the stride and the types can be matched to a truncate.
-  if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
-      (ExtractedFromVec.getValueType().getVectorNumElements() !=
-       Stride * NumElems) ||
-      (VT.getScalarSizeInBits() * Stride > 64))
-    return SDValue();
-
-  // Check remaining operands are consistent with the computed stride.
-  for (unsigned i = 1; i != NumElems; ++i) {
-    SDValue Op = N->getOperand(i);
-
-    if ((Op.getOperand(0) != ExtractedFromVec) ||
-        (GetExtractIdx(Op) != Stride * i))
-      return SDValue();
-  }
-
-  // All checks were ok, construct the truncate.
-  LLVMContext &Ctx = *DAG.getContext();
-  EVT NewVT = VT.getVectorVT(
-      Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
-  EVT TruncVT =
-      VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
-
-  SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec);
-  Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
-  return DAG.getBitcast(VT, Res);
-}
-
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   EVT VT = N->getValueType(0);
 
@@ -14428,10 +14943,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
     return V;
 
-  if (TLI.isDesirableToCombineBuildVectorToTruncate())
-    if (SDValue V = reduceBuildVecToTrunc(N))
-      return V;
-
   if (SDValue V = reduceBuildVecToShuffle(N))
     return V;
 
@@ -14514,8 +15025,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
 
   for (SDValue Op : N->ops()) {
     // Peek through any bitcast.
-    while (Op.getOpcode() == ISD::BITCAST)
-      Op = Op.getOperand(0);
+    Op = peekThroughBitcast(Op);
 
     // UNDEF nodes convert to UNDEF shuffle mask values.
     if (Op.isUndef()) {
@@ -14534,8 +15044,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
     EVT ExtVT = ExtVec.getValueType();
 
     // Peek through any bitcast.
-    while (ExtVec.getOpcode() == ISD::BITCAST)
-      ExtVec = ExtVec.getOperand(0);
+    ExtVec = peekThroughBitcast(ExtVec);
 
     // UNDEF nodes convert to UNDEF shuffle mask values.
     if (ExtVec.isUndef()) {
@@ -14760,9 +15269,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
 
   // We are looking for an optionally bitcasted wide vector binary operator
   // feeding an extract subvector.
-  SDValue BinOp = Extract->getOperand(0);
-  if (BinOp.getOpcode() == ISD::BITCAST)
-    BinOp = BinOp.getOperand(0);
+  SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
 
   // TODO: The motivating case for this transform is an x86 AVX1 target. That
   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
@@ -14786,13 +15293,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
     return SDValue();
 
   // Peek through bitcasts of the binary operator operands if needed.
-  SDValue LHS = BinOp.getOperand(0);
-  if (LHS.getOpcode() == ISD::BITCAST)
-    LHS = LHS.getOperand(0);
-
-  SDValue RHS = BinOp.getOperand(1);
-  if (RHS.getOpcode() == ISD::BITCAST)
-    RHS = RHS.getOperand(0);
+  SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
+  SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
 
   // We need at least one concatenation operation of a binop operand to make
   // this transform worthwhile. The concat must double the input vector sizes.
@@ -14891,8 +15393,34 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   }
 
   // Skip bitcasting
-  if (V->getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
+  V = peekThroughBitcast(V);
+
+  // If the input is a build vector. Try to make a smaller build vector.
+  if (V->getOpcode() == ISD::BUILD_VECTOR) {
+    if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+      EVT InVT = V->getValueType(0);
+      unsigned ExtractSize = NVT.getSizeInBits();
+      unsigned EltSize = InVT.getScalarSizeInBits();
+      // Only do this if we won't split any elements.
+      if (ExtractSize % EltSize == 0) {
+        unsigned NumElems = ExtractSize / EltSize;
+        EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
+                                         InVT.getVectorElementType(), NumElems);
+        if ((!LegalOperations ||
+             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
+            (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
+          unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
+                            EltSize;
+
+          // Extract the pieces from the original build_vector.
+          SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
+                                            makeArrayRef(V->op_begin() + IdxVal,
+                                                         NumElems));
+          return DAG.getBitcast(NVT, BuildVec);
+        }
+      }
+    }
+  }
 
   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
     // Handle only simple case where vector being inserted and vector
@@ -15013,6 +15541,37 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
 }
 
+static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
+                                   SDValue N1, SelectionDAG &DAG) {
+  auto isUndefElt = [](SDValue V, int Idx) {
+    // TODO - handle more cases as required.
+    if (V.getOpcode() == ISD::BUILD_VECTOR)
+      return V.getOperand(Idx).isUndef();
+    if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      return (Idx != 0) || V.getOperand(0).isUndef();
+    return false;
+  };
+
+  EVT VT = SVN->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  bool Changed = false;
+  SmallVector<int, 8> NewMask;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int Idx = SVN->getMaskElt(i);
+    if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
+        ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
+      Changed = true;
+      Idx = -1;
+    }
+    NewMask.push_back(Idx);
+  }
+  if (Changed)
+    return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
+
+  return SDValue();
+}
+
 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
 // or turn a shuffle of a single concat into simpler shuffle then concat.
 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -15091,7 +15650,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
 //
 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
 // We don't fold shuffles where one side is a non-zero constant, and we don't
-// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
 // non-constant operands. This seems to work out reasonably well in practice.
 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
                                        SelectionDAG &DAG,
@@ -15103,6 +15662,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
 
   if (!N0->hasOneUse() || !N1->hasOneUse())
     return SDValue();
+
   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
   // discussed above.
   if (!N1.isUndef()) {
@@ -15114,6 +15674,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
       return SDValue();
   }
 
+  // If both inputs are splats of the same value then we can safely merge this
+  // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
+  bool IsSplat = false;
+  auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+  if (BV0 && BV1)
+    if (SDValue Splat0 = BV0->getSplatValue())
+      IsSplat = (Splat0 == BV1->getSplatValue());
+
   SmallVector<SDValue, 8> Ops;
   SmallSet<SDValue, 16> DuplicateOps;
   for (int M : SVN->getMask()) {
@@ -15124,23 +15693,25 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
       if (S.getOpcode() == ISD::BUILD_VECTOR) {
         Op = S.getOperand(Idx);
       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
-        if (Idx == 0)
-          Op = S.getOperand(0);
+        assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
+        Op = S.getOperand(0);
       } else {
         // Operand can't be combined - bail out.
         return SDValue();
       }
     }
 
-    // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
-    // fine, but it's likely to generate low-quality code if the target can't
-    // reconstruct an appropriate shuffle.
+    // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
+    // generating a splat; semantically, this is fine, but it's likely to
+    // generate low-quality code if the target can't reconstruct an appropriate
+    // shuffle.
     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
-      if (!DuplicateOps.insert(Op).second)
+      if (!IsSplat && !DuplicateOps.insert(Op).second)
         return SDValue();
 
     Ops.push_back(Op);
   }
+
   // BUILD_VECTOR requires all inputs to be of the same type, find the
   // maximum type and extend them all.
   EVT SVT = VT.getScalarType();
@@ -15162,7 +15733,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
                                             SelectionDAG &DAG,
                                             const TargetLowering &TLI,
-                                            bool LegalOperations) {
+                                            bool LegalOperations,
+                                            bool LegalTypes) {
   EVT VT = SVN->getValueType(0);
   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
 
@@ -15190,14 +15762,18 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
   // power-of-2 extensions as they are the most likely.
   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+    // Check for non power of 2 vector sizes
+    if (NumElts % Scale != 0)
+      continue;
     if (!isAnyExtend(Scale))
       continue;
 
     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
-    if (!LegalOperations ||
-        TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
-      return DAG.getBitcast(VT,
+    if (!LegalTypes || TLI.isTypeLegal(OutVT))
+      if (!LegalOperations ||
+          TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+        return DAG.getBitcast(VT,
                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
   }
 
@@ -15218,9 +15794,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
   if (!VT.isInteger() || IsBigEndian)
     return SDValue();
 
-  SDValue N0 = SVN->getOperand(0);
-  while (N0.getOpcode() == ISD::BITCAST)
-    N0 = N0.getOperand(0);
+  SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
 
   unsigned Opcode = N0.getOpcode();
   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
@@ -15316,6 +15890,84 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
                               NewMask);
 }
 
+/// If the shuffle mask is taking exactly one element from the first vector
+/// operand and passing through all other elements from the second vector
+/// operand, return the index of the mask element that is choosing an element
+/// from the first operand. Otherwise, return -1.
+static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
+  int MaskSize = Mask.size();
+  int EltFromOp0 = -1;
+  // TODO: This does not match if there are undef elements in the shuffle mask.
+  // Should we ignore undefs in the shuffle mask instead? The trade-off is
+  // removing an instruction (a shuffle), but losing the knowledge that some
+  // vector lanes are not needed.
+  for (int i = 0; i != MaskSize; ++i) {
+    if (Mask[i] >= 0 && Mask[i] < MaskSize) {
+      // We're looking for a shuffle of exactly one element from operand 0.
+      if (EltFromOp0 != -1)
+        return -1;
+      EltFromOp0 = i;
+    } else if (Mask[i] != i + MaskSize) {
+      // Nothing from operand 1 can change lanes.
+      return -1;
+    }
+  }
+  return EltFromOp0;
+}
+
+/// If a shuffle inserts exactly one element from a source vector operand into
+/// another vector operand and we can access the specified element as a scalar,
+/// then we can eliminate the shuffle.
+static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
+                                      SelectionDAG &DAG) {
+  // First, check if we are taking one element of a vector and shuffling that
+  // element into another vector.
+  ArrayRef<int> Mask = Shuf->getMask();
+  SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
+  SDValue Op0 = Shuf->getOperand(0);
+  SDValue Op1 = Shuf->getOperand(1);
+  int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
+  if (ShufOp0Index == -1) {
+    // Commute mask and check again.
+    ShuffleVectorSDNode::commuteMask(CommutedMask);
+    ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
+    if (ShufOp0Index == -1)
+      return SDValue();
+    // Commute operands to match the commuted shuffle mask.
+    std::swap(Op0, Op1);
+    Mask = CommutedMask;
+  }
+
+  // The shuffle inserts exactly one element from operand 0 into operand 1.
+  // Now see if we can access that element as a scalar via a real insert element
+  // instruction.
+  // TODO: We can try harder to locate the element as a scalar. Examples: it
+  // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
+  assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
+         "Shuffle mask value must be from operand 0");
+  if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
+    return SDValue();
+
+  auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
+  if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
+    return SDValue();
+
+  // There's an existing insertelement with constant insertion index, so we
+  // don't need to check the legality/profitability of a replacement operation
+  // that differs at most in the constant value. The target should be able to
+  // lower any of those in a similar way. If not, legalization will expand this
+  // to a scalar-to-vector plus shuffle.
+  //
+  // Note that the shuffle may move the scalar from the position that the insert
+  // element used. Therefore, our new insert element occurs at the shuffle's
+  // mask index value, not the insert's index value.
+  // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
+  SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
+                                        Op0.getOperand(2).getValueType());
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
+                     Op1, Op0.getOperand(1), NewInsIndex);
+}
+
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -15362,6 +16014,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
   }
 
+  // Simplify shuffle mask if a referenced element is UNDEF.
+  if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
+    return V;
+
+  if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
+    return InsElt;
+
   // A shuffle of a single vector that is a splat can always be folded.
   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
     if (N1->isUndef() && N0Shuf->isSplat())
@@ -15426,7 +16085,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     return S;
 
   // Match shuffles that can be converted to any_vector_extend_in_reg.
-  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
     return V;
 
   // Combine "truncate_vector_in_reg" style shuffles.
@@ -15486,7 +16145,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       if (TLI.isTypeLegal(ScaleVT) &&
           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
-
         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
 
@@ -15661,23 +16319,46 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
-  // with a VECTOR_SHUFFLE.
+  // with a VECTOR_SHUFFLE and possible truncate.
   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
     SDValue InVec = InVal->getOperand(0);
     SDValue EltNo = InVal->getOperand(1);
-
-    // FIXME: We could support implicit truncation if the shuffle can be
-    // scaled to a smaller vector scalar type.
-    ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
-    if (C0 && VT == InVec.getValueType() &&
-        VT.getScalarType() == InVal.getValueType()) {
-      SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+    auto InVecT = InVec.getValueType();
+    if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
+      SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
       int Elt = C0->getZExtValue();
       NewMask[0] = Elt;
-
-      if (TLI.isShuffleMaskLegal(NewMask, VT))
-        return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
-                                    NewMask);
+      SDValue Val;
+      // If we have an implict truncate do truncate here as long as it's legal.
+      // if it's not legal, this should
+      if (VT.getScalarType() != InVal.getValueType() &&
+          InVal.getValueType().isScalarInteger() &&
+          isTypeLegal(VT.getScalarType())) {
+        Val =
+            DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
+      }
+      if (VT.getScalarType() == InVecT.getScalarType() &&
+          VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
+          TLI.isShuffleMaskLegal(NewMask, VT)) {
+        Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
+                                   DAG.getUNDEF(InVecT), NewMask);
+        // If the initial vector is the correct size this shuffle is a
+        // valid result.
+        if (VT == InVecT)
+          return Val;
+        // If not we must truncate the vector.
+        if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
+          MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+          SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
+          EVT SubVT =
+              EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
+                               VT.getVectorNumElements());
+          Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
+                            ZeroIdx);
+          return Val;
+        }
+      }
     }
   }
 
@@ -15694,12 +16375,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   if (N1.isUndef())
     return N0;
 
+  // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
+  // us to pull BITCASTs from input to output.
+  if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
+    if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
+      return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
+
   // If this is an insert of an extracted vector into an undef vector, we can
   // just use the input to the extract.
   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
     return N1.getOperand(0);
 
+  // If we are inserting a bitcast value into an undef, with the same
+  // number of elements, just use the bitcast input of the extract.
+  // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
+  //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
+  if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
+      N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+      N1.getOperand(0).getOperand(1) == N2 &&
+      N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
+          VT.getVectorNumElements()) {
+    return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
+  }
+
+  // If both N1 and N2 are bitcast values on which insert_subvector
+  // would makes sense, pull the bitcast through.
+  // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
+  //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
+  if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
+    SDValue CN0 = N0.getOperand(0);
+    SDValue CN1 = N1.getOperand(0);
+    if (CN0.getValueType().getVectorElementType() ==
+            CN1.getValueType().getVectorElementType() &&
+        CN0.getValueType().getVectorNumElements() ==
+            VT.getVectorNumElements()) {
+      SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+                                      CN0.getValueType(), CN0, CN1, N2);
+      return DAG.getBitcast(VT, NewINSERT);
+    }
+  }
+
   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
@@ -15779,7 +16495,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
+  SDValue RHS = peekThroughBitcast(N->getOperand(1));
   SDLoc DL(N);
 
   // Make sure we're not running after operation legalization where it
@@ -15790,9 +16506,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   if (N->getOpcode() != ISD::AND)
     return SDValue();
 
-  if (RHS.getOpcode() == ISD::BITCAST)
-    RHS = RHS.getOperand(0);
-
   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
     return SDValue();
 
@@ -15945,7 +16658,6 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
 /// the DAG combiner loop to avoid it being looked at.
 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                     SDValue RHS) {
-
   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
@@ -16418,7 +17130,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   // when optimising for minimum size, we don't want to expand a div to a mul
   // and a shift.
-  if (DAG.getMachineFunction().getFunction()->optForMinSize())
+  if (DAG.getMachineFunction().getFunction().optForMinSize())
     return SDValue();
 
   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
@@ -16429,7 +17141,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   if (C->isNullValue())
     return SDValue();
 
-  std::vector<SDNode*> Built;
+  std::vector<SDNode *> Built;
   SDValue S =
       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
 
@@ -16464,7 +17176,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   // when optimising for minimum size, we don't want to expand a div to a mul
   // and a shift.
-  if (DAG.getMachineFunction().getFunction()->optForMinSize())
+  if (DAG.getMachineFunction().getFunction().optForMinSize())
     return SDValue();
 
   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
@@ -16475,7 +17187,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   if (C->isNullValue())
     return SDValue();
 
-  std::vector<SDNode*> Built;
+  std::vector<SDNode *> Built;
   SDValue S =
       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
 
@@ -16760,8 +17472,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   if (Op1->isInvariant() && Op0->writeMem())
     return false;
 
-  unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
-  unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
+  unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
+  unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
 
   // Check for BaseIndexOffset matching.
   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
@@ -16957,7 +17669,11 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
 /// (aliasing node.)
 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
-  SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
+  if (OptLevel == CodeGenOpt::None)
+    return OldChain;
+
+  // Ops for replacing token factor.
+  SmallVector<SDValue, 8> Aliases;
 
   // Accumulate all the aliases to this node.
   GatherAllAliases(N, OldChain, Aliases);
@@ -16987,6 +17703,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
 // to go from a partially-merged state to the desired final
 // fully-merged state.
 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer.
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b2599b2e17f1..d3c94b5f9e6b 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -63,6 +63,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
@@ -98,11 +101,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -168,8 +168,7 @@ bool FastISel::hasTrivialKill(const Value *V) {
 
   // No-op casts are trivially coalesced by fast-isel.
   if (const auto *Cast = dyn_cast<CastInst>(I))
-    if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
-        !hasTrivialKill(Cast->getOperand(0)))
+    if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0)))
       return false;
 
   // Even the value might have only one use in the LLVM IR, it is possible that
@@ -1133,6 +1132,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   case Intrinsic::lifetime_end:
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
+  // Neither does the sideeffect intrinsic.
+  case Intrinsic::sideeffect:
   // Neither does the assume intrinsic; it's also OK not to codegen its operand.
   case Intrinsic::assume:
     return true;
@@ -1187,7 +1188,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
         // into an indirect DBG_VALUE.
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
-                Op->getReg(), 0, DI->getVariable(), DI->getExpression());
+                Op->getReg(), DI->getVariable(), DI->getExpression());
       } else
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(TargetOpcode::DBG_VALUE))
@@ -1212,35 +1213,32 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
-          .addReg(0U)
-          .addImm(DI->getOffset())
-          .addMetadata(DI->getVariable())
-          .addMetadata(DI->getExpression());
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
+              DI->getVariable(), DI->getExpression());
     } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
       if (CI->getBitWidth() > 64)
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
             .addCImm(CI)
-            .addImm(DI->getOffset())
+            .addImm(0U)
             .addMetadata(DI->getVariable())
             .addMetadata(DI->getExpression());
       else
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
             .addImm(CI->getZExtValue())
-            .addImm(DI->getOffset())
+            .addImm(0U)
             .addMetadata(DI->getVariable())
             .addMetadata(DI->getExpression());
     } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
           .addFPImm(CF)
-          .addImm(DI->getOffset())
+          .addImm(0U)
           .addMetadata(DI->getVariable())
           .addMetadata(DI->getExpression());
     } else if (unsigned Reg = lookUpRegForValue(V)) {
       // FIXME: This does not handle register-indirect values at offset 0.
-      bool IsIndirect = DI->getOffset() != 0;
+      bool IsIndirect = false;
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
-              DI->getOffset(), DI->getVariable(), DI->getExpression());
+              DI->getVariable(), DI->getExpression());
     } else {
       // We can't yet handle anything else here because it would require
       // generating code, thus altering codegen because of debug info.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b736037d71dd..c7cdb49203b1 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -17,11 +17,14 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
@@ -32,12 +35,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index b96c96f0b4df..cc9b41b4b487 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -21,14 +21,14 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "instr-emitter"
@@ -673,7 +673,6 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
 MachineInstr *
 InstrEmitter::EmitDbgValue(SDDbgValue *SD,
                            DenseMap<SDValue, unsigned> &VRBaseMap) {
-  uint64_t Offset = SD->getOffset();
   MDNode *Var = SD->getVariable();
   MDNode *Expr = SD->getExpression();
   DebugLoc DL = SD->getDebugLoc();
@@ -685,7 +684,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
     // EmitTargetCodeForFrameDebugValue is responsible for allocation.
     return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
         .addFrameIndex(SD->getFrameIx())
-        .addImm(Offset)
+        .addImm(0)
         .addMetadata(Var)
         .addMetadata(Expr);
   }
@@ -727,11 +726,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
 
   // Indirect addressing is indicated by an Imm as the second parameter.
   if (SD->isIndirect())
-    MIB.addImm(Offset);
-  else {
-    assert(Offset == 0 && "direct value cannot have an offset");
+    MIB.addImm(0U);
+  else
     MIB.addReg(0U, RegState::Debug);
-  }
 
   MIB.addMetadata(Var);
   MIB.addMetadata(Expr);
@@ -938,10 +935,14 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
     break;
   }
-  case ISD::EH_LABEL: {
-    MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+  case ISD::EH_LABEL:
+  case ISD::ANNOTATION_LABEL: {
+    unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL)
+                       ? TargetOpcode::EH_LABEL
+                       : TargetOpcode::ANNOTATION_LABEL;
+    MCSymbol *S = cast<LabelSDNode>(Node)->getLabel();
     BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
-            TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+            TII->get(Opc)).addSym(S);
     break;
   }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7e4bc3ccb5d3..bb1dc17b7a1b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1,4 +1,4 @@
-//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,37 +11,65 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "legalizedag"
 
 namespace {
 
-struct FloatSignAsInt;
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+  EVT FloatVT;
+  SDValue Chain;
+  SDValue FloatPtr;
+  SDValue IntPtr;
+  MachinePointerInfo IntPointerInfo;
+  MachinePointerInfo FloatPointerInfo;
+  SDValue IntValue;
+  APInt SignMask;
+  uint8_t SignBit;
+};
 
 //===----------------------------------------------------------------------===//
 /// This takes an arbitrary SelectionDAG as input and
@@ -54,7 +82,6 @@ struct FloatSignAsInt;
 /// as part of its processing.  For example, if a target does not support a
 /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
 /// will attempt merge setcc and brc instructions into brcc's.
-///
 class SelectionDAGLegalize {
   const TargetMachine &TM;
   const TargetLowering &TLI;
@@ -165,11 +192,13 @@ private:
 
 public:
   // Node replacement helpers
+
   void ReplacedNode(SDNode *N) {
     LegalizedNodes.erase(N);
     if (UpdatedNodes)
       UpdatedNodes->insert(N);
   }
+
   void ReplaceNode(SDNode *Old, SDNode *New) {
     DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
           dbgs() << "     with:      "; New->dump(&DAG));
@@ -182,6 +211,7 @@ public:
       UpdatedNodes->insert(New);
     ReplacedNode(Old);
   }
+
   void ReplaceNode(SDValue Old, SDValue New) {
     DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
           dbgs() << "     with:      "; New->dump(&DAG));
@@ -191,6 +221,7 @@ public:
       UpdatedNodes->insert(New.getNode());
     ReplacedNode(Old.getNode());
   }
+
   void ReplaceNode(SDNode *Old, const SDValue *New) {
     DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
 
@@ -205,7 +236,8 @@ public:
     ReplacedNode(Old);
   }
 };
-}
+
+} // end anonymous namespace
 
 /// Return a vector shuffle operation which
 /// performs the same shuffe in terms of order or result bytes, but on a type
@@ -376,6 +408,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
 }
 
 SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+  DEBUG(dbgs() << "Optimizing float store operations\n");
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   // FIXME: We shouldn't do this for TargetConstantFP's.
   // FIXME: move this to the DAG Combiner!  Note that we can't regress due
@@ -434,172 +467,184 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
 }
 
 void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
-    StoreSDNode *ST = cast<StoreSDNode>(Node);
-    SDValue Chain = ST->getChain();
-    SDValue Ptr = ST->getBasePtr();
-    SDLoc dl(Node);
-
-    unsigned Alignment = ST->getAlignment();
-    MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
-    AAMDNodes AAInfo = ST->getAAInfo();
-
-    if (!ST->isTruncatingStore()) {
-      if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
-        ReplaceNode(ST, OptStore);
-        return;
-      }
+  StoreSDNode *ST = cast<StoreSDNode>(Node);
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+  SDLoc dl(Node);
 
-      {
-        SDValue Value = ST->getValue();
-        MVT VT = Value.getSimpleValueType();
-        switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: llvm_unreachable("This action is not supported yet!");
-        case TargetLowering::Legal: {
-          // If this is an unaligned store and the target doesn't support it,
-          // expand it.
-          EVT MemVT = ST->getMemoryVT();
-          unsigned AS = ST->getAddressSpace();
-          unsigned Align = ST->getAlignment();
-          const DataLayout &DL = DAG.getDataLayout();
-          if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
-            SDValue Result = TLI.expandUnalignedStore(ST, DAG);
-            ReplaceNode(SDValue(ST, 0), Result);
-          }
-          break;
-        }
-        case TargetLowering::Custom: {
-          SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
-          if (Res && Res != SDValue(Node, 0))
-            ReplaceNode(SDValue(Node, 0), Res);
-          return;
-        }
-        case TargetLowering::Promote: {
-          MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
-          assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
-                 "Can only promote stores to same size type");
-          Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
-          SDValue Result =
-              DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
-                           Alignment, MMOFlags, AAInfo);
-          ReplaceNode(SDValue(Node, 0), Result);
-          break;
-        }
-        }
-        return;
-      }
+  unsigned Alignment = ST->getAlignment();
+  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = ST->getAAInfo();
+
+  if (!ST->isTruncatingStore()) {
+    DEBUG(dbgs() << "Legalizing store operation\n");
+    if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+      ReplaceNode(ST, OptStore);
+      return;
+    }
+
+    SDValue Value = ST->getValue();
+    MVT VT = Value.getSimpleValueType();
+    switch (TLI.getOperationAction(ISD::STORE, VT)) {
+    default: llvm_unreachable("This action is not supported yet!");
+    case TargetLowering::Legal: {
+      // If this is an unaligned store and the target doesn't support it,
+      // expand it.
+      EVT MemVT = ST->getMemoryVT();
+      unsigned AS = ST->getAddressSpace();
+      unsigned Align = ST->getAlignment();
+      const DataLayout &DL = DAG.getDataLayout();
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+        DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
+        SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+        ReplaceNode(SDValue(ST, 0), Result);
+      } else
+        DEBUG(dbgs() << "Legal store\n");
+      break;
+    }
+    case TargetLowering::Custom: {
+      DEBUG(dbgs() << "Trying custom lowering\n");
+      SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+      if (Res && Res != SDValue(Node, 0))
+        ReplaceNode(SDValue(Node, 0), Res);
+      return;
+    }
+    case TargetLowering::Promote: {
+      MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+      assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+             "Can only promote stores to same size type");
+      Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+      SDValue Result =
+          DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+                       Alignment, MMOFlags, AAInfo);
+      ReplaceNode(SDValue(Node, 0), Result);
+      break;
+    }
+    }
+    return;
+  }
+
+  DEBUG(dbgs() << "Legalizing truncating store operations\n");
+  SDValue Value = ST->getValue();
+  EVT StVT = ST->getMemoryVT();
+  unsigned StWidth = StVT.getSizeInBits();
+  auto &DL = DAG.getDataLayout();
+
+  if (StWidth != StVT.getStoreSizeInBits()) {
+    // Promote to a byte-sized store with upper bits zero if not
+    // storing an integral number of bytes.  For example, promote
+    // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+    EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                                StVT.getStoreSizeInBits());
+    Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+    SDValue Result =
+        DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
+                          Alignment, MMOFlags, AAInfo);
+    ReplaceNode(SDValue(Node, 0), Result);
+  } else if (StWidth & (StWidth - 1)) {
+    // If not storing a power-of-2 number of bits, expand as two stores.
+    assert(!StVT.isVector() && "Unsupported truncstore!");
+    unsigned RoundWidth = 1 << Log2_32(StWidth);
+    assert(RoundWidth < StWidth);
+    unsigned ExtraWidth = StWidth - RoundWidth;
+    assert(ExtraWidth < RoundWidth);
+    assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+           "Store size not an integral number of bytes!");
+    EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+    EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+    SDValue Lo, Hi;
+    unsigned IncrementSize;
+
+    if (DL.isLittleEndian()) {
+      // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+      // Store the bottom RoundWidth bits.
+      Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+                             RoundVT, Alignment, MMOFlags, AAInfo);
+
+      // Store the remaining ExtraWidth bits.
+      IncrementSize = RoundWidth / 8;
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                        DAG.getConstant(IncrementSize, dl,
+                                        Ptr.getValueType()));
+      Hi = DAG.getNode(
+          ISD::SRL, dl, Value.getValueType(), Value,
+          DAG.getConstant(RoundWidth, dl,
+                          TLI.getShiftAmountTy(Value.getValueType(), DL)));
+      Hi = DAG.getTruncStore(
+          Chain, dl, Hi, Ptr,
+          ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+          MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
     } else {
-      SDValue Value = ST->getValue();
-
-      EVT StVT = ST->getMemoryVT();
-      unsigned StWidth = StVT.getSizeInBits();
-      auto &DL = DAG.getDataLayout();
-
-      if (StWidth != StVT.getStoreSizeInBits()) {
-        // Promote to a byte-sized store with upper bits zero if not
-        // storing an integral number of bytes.  For example, promote
-        // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
-        EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
-                                    StVT.getStoreSizeInBits());
-        Value = DAG.getZeroExtendInReg(Value, dl, StVT);
-        SDValue Result =
-            DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
-                              Alignment, MMOFlags, AAInfo);
-        ReplaceNode(SDValue(Node, 0), Result);
-      } else if (StWidth & (StWidth - 1)) {
-        // If not storing a power-of-2 number of bits, expand as two stores.
-        assert(!StVT.isVector() && "Unsupported truncstore!");
-        unsigned RoundWidth = 1 << Log2_32(StWidth);
-        assert(RoundWidth < StWidth);
-        unsigned ExtraWidth = StWidth - RoundWidth;
-        assert(ExtraWidth < RoundWidth);
-        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
-               "Store size not an integral number of bytes!");
-        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
-        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
-        SDValue Lo, Hi;
-        unsigned IncrementSize;
-
-        if (DL.isLittleEndian()) {
-          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
-          // Store the bottom RoundWidth bits.
-          Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
-                                 RoundVT, Alignment, MMOFlags, AAInfo);
-
-          // Store the remaining ExtraWidth bits.
-          IncrementSize = RoundWidth / 8;
-          Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                            DAG.getConstant(IncrementSize, dl,
-                                            Ptr.getValueType()));
-          Hi = DAG.getNode(
-              ISD::SRL, dl, Value.getValueType(), Value,
-              DAG.getConstant(RoundWidth, dl,
-                              TLI.getShiftAmountTy(Value.getValueType(), DL)));
-          Hi = DAG.getTruncStore(
-              Chain, dl, Hi, Ptr,
-              ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
-              MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
-        } else {
-          // Big endian - avoid unaligned stores.
-          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
-          // Store the top RoundWidth bits.
-          Hi = DAG.getNode(
-              ISD::SRL, dl, Value.getValueType(), Value,
-              DAG.getConstant(ExtraWidth, dl,
-                              TLI.getShiftAmountTy(Value.getValueType(), DL)));
-          Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
-                                 RoundVT, Alignment, MMOFlags, AAInfo);
-
-          // Store the remaining ExtraWidth bits.
-          IncrementSize = RoundWidth / 8;
-          Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                            DAG.getConstant(IncrementSize, dl,
-                                            Ptr.getValueType()));
-          Lo = DAG.getTruncStore(
-              Chain, dl, Value, Ptr,
-              ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
-              MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
-        }
+      // Big endian - avoid unaligned stores.
+      // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+      // Store the top RoundWidth bits.
+      Hi = DAG.getNode(
+          ISD::SRL, dl, Value.getValueType(), Value,
+          DAG.getConstant(ExtraWidth, dl,
+                          TLI.getShiftAmountTy(Value.getValueType(), DL)));
+      Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+                             RoundVT, Alignment, MMOFlags, AAInfo);
 
-        // The order of the stores doesn't matter.
-        SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
-        ReplaceNode(SDValue(Node, 0), Result);
+      // Store the remaining ExtraWidth bits.
+      IncrementSize = RoundWidth / 8;
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                        DAG.getConstant(IncrementSize, dl,
+                                        Ptr.getValueType()));
+      Lo = DAG.getTruncStore(
+          Chain, dl, Value, Ptr,
+          ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+          MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+    }
+
+    // The order of the stores doesn't matter.
+    SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+    ReplaceNode(SDValue(Node, 0), Result);
+  } else {
+    switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+    default: llvm_unreachable("This action is not supported yet!");
+    case TargetLowering::Legal: {
+      EVT MemVT = ST->getMemoryVT();
+      unsigned AS = ST->getAddressSpace();
+      unsigned Align = ST->getAlignment();
+      // If this is an unaligned store and the target doesn't support it,
+      // expand it.
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+        SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+        ReplaceNode(SDValue(ST, 0), Result);
+      }
+      break;
+    }
+    case TargetLowering::Custom: {
+      SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+      if (Res && Res != SDValue(Node, 0))
+        ReplaceNode(SDValue(Node, 0), Res);
+      return;
+    }
+    case TargetLowering::Expand:
+      assert(!StVT.isVector() &&
+             "Vector Stores are handled in LegalizeVectorOps");
+
+      SDValue Result;
+
+      // TRUNCSTORE:i16 i32 -> STORE i16
+      if (TLI.isTypeLegal(StVT)) {
+        Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+        Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+                              Alignment, MMOFlags, AAInfo);
       } else {
-        switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: llvm_unreachable("This action is not supported yet!");
-        case TargetLowering::Legal: {
-          EVT MemVT = ST->getMemoryVT();
-          unsigned AS = ST->getAddressSpace();
-          unsigned Align = ST->getAlignment();
-          // If this is an unaligned store and the target doesn't support it,
-          // expand it.
-          if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
-            SDValue Result = TLI.expandUnalignedStore(ST, DAG);
-            ReplaceNode(SDValue(ST, 0), Result);
-          }
-          break;
-        }
-        case TargetLowering::Custom: {
-          SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
-          if (Res && Res != SDValue(Node, 0))
-            ReplaceNode(SDValue(Node, 0), Res);
-          return;
-        }
-        case TargetLowering::Expand:
-          assert(!StVT.isVector() &&
-                 "Vector Stores are handled in LegalizeVectorOps");
-
-          // TRUNCSTORE:i16 i32 -> STORE i16
-          assert(TLI.isTypeLegal(StVT) &&
-                 "Do not know how to expand this store!");
-          Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
-          SDValue Result =
-              DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
-                           Alignment, MMOFlags, AAInfo);
-          ReplaceNode(SDValue(Node, 0), Result);
-          break;
-        }
+        // The in-memory type isn't legal. Truncate to the type it would promote
+        // to, and then do a truncstore.
+        Value = DAG.getNode(ISD::TRUNCATE, dl,
+                            TLI.getTypeToTransformTo(*DAG.getContext(), StVT),
+                            Value);
+        Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+                                   StVT, Alignment, MMOFlags, AAInfo);
       }
+
+      ReplaceNode(SDValue(Node, 0), Result);
+      break;
     }
+  }
 }
 
 void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
@@ -611,6 +656,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::NON_EXTLOAD) {
+    DEBUG(dbgs() << "Legalizing non-extending load operation\n");
     MVT VT = Node->getSimpleValueType(0);
     SDValue RVal = SDValue(Node, 0);
     SDValue RChain = SDValue(Node, 1);
@@ -629,13 +675,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       }
       break;
     }
-    case TargetLowering::Custom: {
+    case TargetLowering::Custom:
       if (SDValue Res = TLI.LowerOperation(RVal, DAG)) {
         RVal = Res;
         RChain = Res.getValue(1);
       }
       break;
-    }
+
     case TargetLowering::Promote: {
       MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
       assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
@@ -660,6 +706,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     return;
   }
 
+  DEBUG(dbgs() << "Legalizing extending load operation\n");
   EVT SrcVT = LD->getMemoryVT();
   unsigned SrcWidth = SrcVT.getSizeInBits();
   unsigned Alignment = LD->getAlignment();
@@ -795,7 +842,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     case TargetLowering::Custom:
       isCustom = true;
       LLVM_FALLTHROUGH;
-    case TargetLowering::Legal: {
+    case TargetLowering::Legal:
       Value = SDValue(Node, 0);
       Chain = SDValue(Node, 1);
 
@@ -816,8 +863,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
         }
       }
       break;
-    }
-    case TargetLowering::Expand:
+
+    case TargetLowering::Expand: {
       EVT DestVT = Node->getValueType(0);
       if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
         // If the source type is not legal, see if there is a legal extload to
@@ -883,6 +930,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       Chain = Result.getValue(1);
       break;
     }
+    }
   }
 
   // Since loads produce two values, make sure to remember that we legalized
@@ -907,6 +955,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
     case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
     case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
     case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+    case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
     case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
     case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
     case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@@ -932,7 +981,9 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
 void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
 
-  if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+  // Allow illegal target nodes and illegal registers.
+  if (Node->getOpcode() == ISD::TargetConstant ||
+      Node->getOpcode() == ISD::Register)
     return;
 
 #ifndef NDEBUG
@@ -946,7 +997,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
               TargetLowering::TypeLegal ||
             TLI.isTypeLegal(Op.getValueType()) ||
-            Op.getOpcode() == ISD::TargetConstant) &&
+            Op.getOpcode() == ISD::TargetConstant ||
+            Op.getOpcode() == ISD::Register) &&
             "Unexpected illegal type!");
 #endif
 
@@ -983,11 +1035,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
     break;
   }
-  case ISD::ATOMIC_STORE: {
+  case ISD::ATOMIC_STORE:
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getOperand(2).getValueType());
     break;
-  }
   case ISD::SELECT_CC:
   case ISD::SETCC:
   case ISD::BR_CC: {
@@ -1072,6 +1123,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     }
     break;
   case ISD::STRICT_FSQRT:
+  case ISD::STRICT_FMA:
   case ISD::STRICT_FPOW:
   case ISD::STRICT_FPOWI:
   case ISD::STRICT_FSIN:
@@ -1090,7 +1142,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
                                      Node->getValueType(0));
     break;
-
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -1141,8 +1192,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
         if (SAO != Op2)
           NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
       }
+      break;
     }
-    break;
     }
 
     if (NewNode != Node) {
@@ -1151,8 +1202,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     }
     switch (Action) {
     case TargetLowering::Legal:
+      DEBUG(dbgs() << "Legal node: nothing to do\n");
       return;
-    case TargetLowering::Custom: {
+    case TargetLowering::Custom:
+      DEBUG(dbgs() << "Trying custom legalization\n");
       // FIXME: The handling for custom lowering with multiple results is
       // a complete mess.
       if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
@@ -1160,6 +1213,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
           return;
 
         if (Node->getNumValues() == 1) {
+          DEBUG(dbgs() << "Successfully custom legalized node\n");
           // We can just directly replace this node with the lowered value.
           ReplaceNode(SDValue(Node, 0), Res);
           return;
@@ -1168,11 +1222,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
         SmallVector<SDValue, 8> ResultVals;
         for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
           ResultVals.push_back(Res.getValue(i));
+        DEBUG(dbgs() << "Successfully custom legalized node\n");
         ReplaceNode(Node, ResultVals.data());
         return;
       }
+      DEBUG(dbgs() << "Could not custom legalize node\n");
       LLVM_FALLTHROUGH;
-    }
     case TargetLowering::Expand:
       if (ExpandNode(Node))
         return;
@@ -1198,13 +1253,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::CALLSEQ_START:
   case ISD::CALLSEQ_END:
     break;
-  case ISD::LOAD: {
+  case ISD::LOAD:
     return LegalizeLoadOps(Node);
-  }
-  case ISD::STORE: {
+  case ISD::STORE:
     return LegalizeStoreOps(Node);
   }
-  }
 }
 
 SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1240,7 +1293,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
       // If the index is dependent on the store we will introduce a cycle when
       // creating the load (the load uses the index, and by replacing the chain
       // we will make the index dependent on the load). Also, the store might be
-      // dependent on the extractelement and introduce a cycle when creating 
+      // dependent on the extractelement and introduce a cycle when creating
       // the load.
       if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
           ST->hasPredecessor(Op.getNode()))
@@ -1361,22 +1414,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo);
 }
 
-namespace {
-/// Keeps track of state when getting the sign of a floating-point value as an
-/// integer.
-struct FloatSignAsInt {
-  EVT FloatVT;
-  SDValue Chain;
-  SDValue FloatPtr;
-  SDValue IntPtr;
-  MachinePointerInfo IntPointerInfo;
-  MachinePointerInfo FloatPointerInfo;
-  SDValue IntValue;
-  APInt SignMask;
-  uint8_t SignBit;
-};
-}
-
 /// Bitcast a floating-point value to an integer value. Only bitcast the part
 /// containing the sign bit if the target has no integer value capable of
 /// holding all bits of the floating-point value.
@@ -1753,8 +1790,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
   // We do this in two phases; first to check the legality of the shuffles,
   // and next, assuming that all shuffles are legal, to create the new nodes.
   for (int Phase = 0; Phase < 2; ++Phase) {
-    SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals,
-                                                               NewIntermedVals;
+    SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals,
+                                                              NewIntermedVals;
     for (unsigned i = 0; i < NumElems; ++i) {
       SDValue V = Node->getOperand(i);
       if (V.isUndef())
@@ -1977,10 +2014,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   // isTailCall may be true since the callee does not reference caller stack
   // frame. Check if it's in the right position and that the return types match.
   SDValue TCChain = InChain;
-  const Function *F = DAG.getMachineFunction().getFunction();
+  const Function &F = DAG.getMachineFunction().getFunction();
   bool isTailCall =
       TLI.isInTailCallPosition(DAG, Node, TCChain) &&
-      (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy());
+      (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
   if (isTailCall)
     InChain = TCChain;
 
@@ -1996,10 +2033,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
 
   std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
 
-  if (!CallInfo.second.getNode())
+  if (!CallInfo.second.getNode()) {
+    DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
     // It's a tailcall, return the chain (which is the DAG root).
     return DAG.getRoot();
+  }
 
+  DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
   return CallInfo.first;
 }
 
@@ -2285,9 +2325,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
                                                    EVT DestVT,
                                                    const SDLoc &dl) {
   // TODO: Should any fast-math-flags be set for the created nodes?
-
+  DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
   if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
-    // simple 32-bit [signed|unsigned] integer to float/double expansion
+    DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
+                    "expansion\n");
 
     // Get the stack frame index of a 8 byte buffer.
     SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
@@ -2352,6 +2393,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
   // and in all alternate rounding modes.
   // TODO: Generalize this for use with other types.
   if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+    DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
     SDValue TwoP52 =
       DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
     SDValue TwoP84PlusTwoP52 =
@@ -2372,9 +2414,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
     return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
   }
 
-  // Implementation of unsigned i64 to f32.
   // TODO: Generalize this for use with other types.
   if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+    DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
     // For unsigned conversions, convert them to signed conversions using the
     // algorithm from the x86_64 __floatundidf in compiler_rt.
     if (!isSigned) {
@@ -2498,7 +2540,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
   unsigned OpToUse = 0;
 
   // Scan for the appropriate larger type to use.
-  while (1) {
+  while (true) {
     NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
     assert(NewInTy.isInteger() && "Ran out of possibilities!");
 
@@ -2539,7 +2581,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
   unsigned OpToUse = 0;
 
   // Scan for the appropriate larger type to use.
-  while (1) {
+  while (true) {
     NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
     assert(NewOutTy.isInteger() && "Ran out of possibilities!");
 
@@ -2559,7 +2601,6 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
     // Otherwise, try a larger type.
   }
 
-
   // Okay, we found the operation and type to use.
   SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
 
@@ -2745,7 +2786,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
   case ISD::CTLZ: {
     EVT VT = Op.getValueType();
-    unsigned len = VT.getSizeInBits();
+    unsigned Len = VT.getSizeInBits();
 
     if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
       EVT SetCCVT = getSetCCResultType(VT);
@@ -2753,7 +2794,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
       SDValue Zero = DAG.getConstant(0, dl, VT);
       SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
       return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
-                         DAG.getConstant(len, dl, VT), CTLZ);
+                         DAG.getConstant(Len, dl, VT), CTLZ);
     }
 
     // for now, we do this:
@@ -2766,7 +2807,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     //
     // Ref: "Hacker's Delight" by Henry Warren
     EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+    for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) {
       SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
       Op = DAG.getNode(ISD::OR, dl, VT, Op,
                        DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
@@ -2778,11 +2819,22 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // This trivially expands to CTTZ.
     return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
   case ISD::CTTZ: {
+    EVT VT = Op.getValueType();
+    unsigned Len = VT.getSizeInBits();
+
+    if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+      EVT SetCCVT = getSetCCResultType(VT);
+      SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+      SDValue Zero = DAG.getConstant(0, dl, VT);
+      SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+      return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+                         DAG.getConstant(Len, dl, VT), CTTZ);
+    }
+
     // for now, we use: { return popcount(~x & (x - 1)); }
     // unless the target has ctlz but not ctpop, in which case we use:
     // { return 32 - nlz(~x & (x-1)); }
     // Ref: "Hacker's Delight" by Henry Warren
-    EVT VT = Op.getValueType();
     SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
                                DAG.getNOT(dl, Op, VT),
                                DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -2799,6 +2851,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
 }
 
 bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+  DEBUG(dbgs() << "Trying to expand node\n");
   SmallVector<SDValue, 8> Results;
   SDLoc dl(Node);
   SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2983,8 +3036,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // NOTE: we could fall back on load/store here too for targets without
     // SRA.  However, it is doubtful that any exist.
     EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-    if (VT.isVector())
-      ShiftAmountTy = VT;
     unsigned BitsDiff = VT.getScalarSizeInBits() -
                         ExtraVT.getScalarSizeInBits();
     SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
@@ -3062,10 +3113,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::INSERT_SUBVECTOR:
     Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
     break;
-  case ISD::CONCAT_VECTORS: {
+  case ISD::CONCAT_VECTORS:
     Results.push_back(ExpandVectorBuildThroughStack(Node));
     break;
-  }
   case ISD::SCALAR_TO_VECTOR:
     Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
     break;
@@ -3083,14 +3133,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Op0 = Node->getOperand(0);
     SDValue Op1 = Node->getOperand(1);
     if (!TLI.isTypeLegal(EltVT)) {
-
       EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
 
       // BUILD_VECTOR operands are allowed to be wider than the element type.
       // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
       // it.
       if (NewEltVT.bitsLT(EltVT)) {
-
         // Convert shuffle node.
         // If original node was v4i64 and the new EltVT is i32,
         // cast operands to v8i32 and re-build the mask.
@@ -3261,6 +3309,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
     break;
   case ISD::FP_TO_FP16:
+    DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
     if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
       SDValue Op = Node->getOperand(0);
       MVT SVT = Op.getSimpleValueType();
@@ -3457,7 +3506,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
     //   Sub:
     //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
-    //
     SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
     SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
     SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
@@ -3666,10 +3714,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                          Tmp2.getOperand(0), Tmp2.getOperand(1),
                          Node->getOperand(2));
     } else {
-      // We test only the i1 bit.  Skip the AND if UNDEF.
-      Tmp3 = (Tmp2.isUndef()) ? Tmp2 :
-        DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
-                    DAG.getConstant(1, dl, Tmp2.getValueType()));
+      // We test only the i1 bit.  Skip the AND if UNDEF or another AND.
+      if (Tmp2.isUndef() ||
+          (Tmp2.getOpcode() == ISD::AND &&
+           isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
+           dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+        Tmp3 = Tmp2;
+      else
+        Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getConstant(1, dl, Tmp2.getValueType()));
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
                          DAG.getCondCode(ISD::SETNE), Tmp3,
                          DAG.getConstant(0, dl, Tmp3.getValueType()),
@@ -3865,17 +3918,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
 
   // Replace the original node with the legalized result.
-  if (Results.empty())
+  if (Results.empty()) {
+    DEBUG(dbgs() << "Cannot expand node\n");
     return false;
+  }
 
+  DEBUG(dbgs() << "Succesfully expanded node\n");
   ReplaceNode(Node, Results.data());
   return true;
 }
 
 void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+  DEBUG(dbgs() << "Trying to convert node to libcall\n");
   SmallVector<SDValue, 8> Results;
   SDLoc dl(Node);
-  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
   unsigned Opc = Node->getOpcode();
   switch (Opc) {
   case ISD::ATOMIC_FENCE: {
@@ -4057,6 +4113,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                       RTLIB::REM_PPCF128));
     break;
   case ISD::FMA:
+  case ISD::STRICT_FMA:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
                                       RTLIB::FMA_F80, RTLIB::FMA_F128,
                                       RTLIB::FMA_PPCF128));
@@ -4126,8 +4183,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
   }
 
   // Replace the original node with the legalized result.
-  if (!Results.empty())
+  if (!Results.empty()) {
+    DEBUG(dbgs() << "Successfully converted node to libcall\n");
     ReplaceNode(Node, Results.data());
+  } else
+    DEBUG(dbgs() << "Could not convert node to libcall\n");
 }
 
 // Determine the vector type to use in place of an original scalar element when
@@ -4141,6 +4201,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI,
 }
 
 void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+  DEBUG(dbgs() << "Trying to promote node\n");
   SmallVector<SDValue, 8> Results;
   MVT OVT = Node->getSimpleValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
@@ -4369,7 +4430,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FREM:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-  case ISD::FPOW: {
+  case ISD::FPOW:
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
     Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
@@ -4377,8 +4438,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
                                   Tmp3, DAG.getIntPtrConstant(0, dl)));
     break;
-  }
-  case ISD::FMA: {
+  case ISD::FMA:
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
     Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
@@ -4387,7 +4447,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                     DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
                     DAG.getIntPtrConstant(0, dl)));
     break;
-  }
   case ISD::FCOPYSIGN:
   case ISD::FPOWI: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
@@ -4419,13 +4478,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FLOG10:
   case ISD::FABS:
   case ISD::FEXP:
-  case ISD::FEXP2: {
+  case ISD::FEXP2:
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
                                   Tmp2, DAG.getIntPtrConstant(0, dl)));
     break;
-  }
   case ISD::BUILD_VECTOR: {
     MVT EltVT = OVT.getVectorElementType();
     MVT NewEltVT = NVT.getVectorElementType();
@@ -4579,8 +4637,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   }
 
   // Replace the original node with the legalized result.
-  if (!Results.empty())
+  if (!Results.empty()) {
+    DEBUG(dbgs() << "Successfully promoted node\n");
     ReplaceNode(Node, Results.data());
+  } else
+    DEBUG(dbgs() << "Could not promote node\n");
 }
 
 /// This is the entry point for the file.
@@ -4602,7 +4663,7 @@ void SelectionDAG::Legalize() {
   // nodes with their original operands intact. Legalization can produce
   // new nodes which may themselves need to be legalized. Iterate until all
   // nodes have been legalized.
-  for (;;) {
+  while (true) {
     bool AnyLegalized = false;
     for (auto NI = allnodes_end(); NI != allnodes_begin();) {
       --NI;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 75fec7bd1d48..29f0bb475b08 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -40,8 +40,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   SDValue Res = SDValue();
 
   // See if the target wants to custom expand this node.
-  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+  if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+    DEBUG(dbgs() << "Node has been custom expanded, done\n");
     return;
+  }
 
   switch (N->getOpcode()) {
   default:
@@ -568,10 +570,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
   SDValue Mask = N->getOperand(0);
-  EVT OpTy = N->getOperand(1).getValueType();
 
-  // Promote all the way up to the canonical SetCC type.
-  Mask = PromoteTargetBoolean(Mask, OpTy);
   SDValue LHS = GetPromotedInteger(N->getOperand(1));
   SDValue RHS = GetPromotedInteger(N->getOperand(2));
   return DAG.getNode(ISD::VSELECT, SDLoc(N),
@@ -773,7 +772,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
 SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
   if (ResNo == 1)
     return PromoteIntRes_Overflow(N);
-  llvm_unreachable("Not implemented");
+
+  // We need to sign-extend the operands so the carry value computed by the
+  // wide operation will be equivalent to the carry value computed by the
+  // narrow operation.
+  // An ADDCARRY can generate carry only if any of the operands has its
+  // most significant bit set. Sign extension propagates the most significant
+  // bit into the higher bits which means the extra bit that the narrow
+  // addition would need (i.e. the carry) will be propagated through the higher
+  // bits of the wide addition.
+  // A SUBCARRY can generate borrow only if LHS < RHS and this property will be
+  // preserved by sign extension.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+
+  EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)};
+
+  // Do the arithmetic in the wide type.
+  SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs),
+                            LHS, RHS, N->getOperand(2));
+
+  // Update the users of the original carry/borrow value.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+  return SDValue(Res.getNode(), 0);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
@@ -885,8 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
-  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+    DEBUG(dbgs() << "Node has been custom lowered, done\n");
     return false;
+  }
 
   switch (N->getOpcode()) {
     default:
@@ -1206,24 +1230,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
     // When the data operand has illegal type, we should legalize the data
     // operand first. The mask will be promoted/splitted/widened according to
     // the data operand type.
-    if (TLI.isTypeLegal(DataVT))
+    if (TLI.isTypeLegal(DataVT)) {
       Mask = PromoteTargetBoolean(Mask, DataVT);
-    else {
-      if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
-        return PromoteIntOp_MSTORE(N, 3);
-
-      else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
-        return WidenVecOp_MSTORE(N, 3);
-
-      else {
-        assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
-        return SplitVecOp_MSTORE(N, 3);
-      }
+      // Update in place.
+      SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+      NewOps[2] = Mask;
+      return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
     }
+
+    if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+      return PromoteIntOp_MSTORE(N, 3);
+    if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+      return WidenVecOp_MSTORE(N, 3);
+    assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+    return SplitVecOp_MSTORE(N, 3);
   } else { // Data operand
     assert(OpNo == 3 && "Unexpected operand for promotion");
     DataOp = GetPromotedInteger(DataOp);
-    Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
     TruncateStore = true;
   }
 
@@ -1250,6 +1273,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
     // The Mask
     EVT DataVT = N->getValueType(0);
     NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+  } else if (OpNo == 4) {
+    // Need to sign extend the index since the bits will likely be used.
+    NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
   } else
     NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
 
@@ -1270,6 +1296,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
     // The Mask
     EVT DataVT = N->getValue().getValueType();
     NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+  } else if (OpNo == 4) {
+    // Need to sign extend the index since the bits will likely be used.
+    NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
   } else
     NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
   return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
@@ -3224,8 +3253,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+    Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
     Hi = DAG.getTruncStore(
         Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
         MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
@@ -3260,8 +3288,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
                          MMOFlags, AAInfo);
 
   // Increment the pointer to the other half.
-  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+  Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
   // Store the lowest ExcessBits bits in the second half.
   Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
                          N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3462,7 +3489,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
 
-  EVT InElemTy = OutVT.getVectorElementType();
   EVT OutElemTy = NOutVT.getVectorElementType();
 
   unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
@@ -3471,15 +3497,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   assert(NumElem * NumOperands == NumOutElem &&
          "Unexpected number of elements");
 
+  // If the input type is legal and we can promote it to a legal type with the
+  // same element size, go ahead do that to create a new concat.
+  if (getTypeAction(N->getOperand(0).getValueType()) ==
+      TargetLowering::TypeLegal) {
+    EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem);
+    if (TLI.isTypeLegal(InPromotedTy)) {
+      SmallVector<SDValue, 8> Ops(NumOperands);
+      for (unsigned i = 0; i < NumOperands; ++i) {
+        Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy,
+                             N->getOperand(i));
+      }
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops);
+    }
+  }
+
   // Take the elements from the first vector.
   SmallVector<SDValue, 8> Ops(NumOutElem);
   for (unsigned i = 0; i < NumOperands; ++i) {
     SDValue Op = N->getOperand(i);
+    if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger)
+      Op = GetPromotedInteger(Op);
+    EVT SclrTy = Op.getValueType().getVectorElementType();
+    assert(NumElem == Op.getValueType().getVectorNumElements() &&
+           "Unexpected number of elements");
+
     for (unsigned j = 0; j < NumElem; ++j) {
       SDValue Ext = DAG.getNode(
-          ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op,
+          ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
           DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-      Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+      Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
     }
   }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 001eed9fb8f6..b60d7bca498a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -14,7 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Support/CommandLine.h"
@@ -222,15 +224,21 @@ bool DAGTypeLegalizer::run() {
     assert(N->getNodeId() == ReadyToProcess &&
            "Node should be ready if on worklist!");
 
-    if (IgnoreNodeResults(N))
+    DEBUG(dbgs() << "Legalizing node: "; N->dump());
+    if (IgnoreNodeResults(N)) {
+      DEBUG(dbgs() << "Ignoring node results\n");
       goto ScanOperands;
+    }
 
     // Scan the values produced by the node, checking to see if any result
     // types are illegal.
     for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
       EVT ResultVT = N->getValueType(i);
+      DEBUG(dbgs() << "Analyzing result type: " <<
+                      ResultVT.getEVTString() << "\n");
       switch (getTypeAction(ResultVT)) {
       case TargetLowering::TypeLegal:
+        DEBUG(dbgs() << "Legal result type\n");
         break;
       // The following calls must take care of *all* of the node's results,
       // not just the illegal result they were passed (this includes results
@@ -287,9 +295,12 @@ ScanOperands:
       if (IgnoreNodeResults(N->getOperand(i).getNode()))
         continue;
 
-      EVT OpVT = N->getOperand(i).getValueType();
+      const auto Op = N->getOperand(i);
+      DEBUG(dbgs() << "Analyzing operand: "; Op.dump());
+      EVT OpVT = Op.getValueType();
       switch (getTypeAction(OpVT)) {
       case TargetLowering::TypeLegal:
+        DEBUG(dbgs() << "Legal operand\n");
         continue;
       // The following calls must either replace all of the node's results
       // using ReplaceValueWith, and return "false"; or update the node's
@@ -832,6 +843,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
   AnalyzeNewValue(Lo);
   AnalyzeNewValue(Hi);
 
+  // Transfer debug values. Don't invalidate the source debug value until it's
+  // been transferred to the high and low bits.
+  if (DAG.getDataLayout().isBigEndian()) {
+    DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false);
+    DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(),
+                          Lo.getValueSizeInBits());
+  } else {
+    DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false);
+    DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(),
+                          Hi.getValueSizeInBits());
+  }
+
   // Remember that this is the result of the node.
   std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
   assert(!Entry.first.getNode() && "Node already expanded");
@@ -1002,8 +1025,13 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
   // Update the widening map.
   assert(Results.size() == N->getNumValues() &&
          "Custom lowering returned the wrong number of results!");
-  for (unsigned i = 0, e = Results.size(); i != e; ++i)
-    SetWidenedVector(SDValue(N, i), Results[i]);
+  for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+    // If this is a chain output just replace it.
+    if (Results[i].getValueType() == MVT::Other)
+      ReplaceValueWith(SDValue(N, i), Results[i]);
+    else
+      SetWidenedVector(SDValue(N, i), Results[i]);
+  }
   return true;
 }
 
@@ -1117,23 +1145,6 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
   return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
 }
 
-/// Widen the given target boolean to a target boolean of the given type.
-/// The boolean vector is widened and then promoted to match the target boolean
-/// type of the given ValVT.
-SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
-                                             bool WithZeroes) {
-  SDLoc dl(Bool);
-  EVT BoolVT = Bool.getValueType();
-
-  assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
-         TLI.isTypeLegal(ValVT) &&
-         "Unexpected types in WidenTargetBoolean");
-  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
-                                ValVT.getVectorNumElements());
-  Bool = ModifyToType(Bool, WideVT, WithZeroes);
-  return PromoteTargetBoolean(Bool, ValVT);
-}
-
 /// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     EVT LoVT, EVT HiVT,
@@ -1142,9 +1153,14 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
   assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
          Op.getValueSizeInBits() && "Invalid integer splitting!");
   Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+  unsigned ReqShiftAmountInBits =
+      Log2_32_Ceil(Op.getValueType().getSizeInBits());
+  MVT ShiftAmountTy =
+      TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType());
+  if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits())
+    ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits));
   Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
-                   DAG.getConstant(LoVT.getSizeInBits(), dl,
-                                   TLI.getPointerTy(DAG.getDataLayout())));
+                   DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy));
   Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c46d1b04804c..64cb80e0d853 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -18,9 +18,9 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
 
@@ -89,7 +89,8 @@ private:
 
   /// Pretend all of this node's results are legal.
   bool IgnoreNodeResults(SDNode *N) const {
-    return N->getOpcode() == ISD::TargetConstant;
+    return N->getOpcode() == ISD::TargetConstant ||
+           N->getOpcode() == ISD::Register;
   }
 
   /// For integer nodes that are below legal width, this map indicates what
@@ -182,10 +183,6 @@ private:
 
   SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
 
-  /// Modify Bit Vector to match SetCC result type of ValVT.
-  /// The bit vector is widened with zeroes when WithZeroes is true.
-  SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
-
   void ReplaceValueWith(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
   void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -400,18 +397,22 @@ private:
   /// Given an operand Op of Float type, returns the integer if the Op is not
   /// supported in target HW and converted to the integer.
   /// The integer contains exactly the same bits as Op - only the type changed.
-  /// For example, if Op is an f32 which was softened to an i32, then this method
-  /// returns an i32, the bits of which coincide with those of Op.
+  /// For example, if Op is an f32 which was softened to an i32, then this
+  /// method returns an i32, the bits of which coincide with those of Op.
   /// If the Op can be efficiently supported in target HW or the operand must
   /// stay in a register, the Op is not converted to an integer.
   /// In that case, the given op is returned.
   SDValue GetSoftenedFloat(SDValue Op) {
-    SDValue &SoftenedOp = SoftenedFloats[Op];
-    if (!SoftenedOp.getNode() &&
-        isSimpleLegalType(Op.getValueType()))
+    auto Iter = SoftenedFloats.find(Op);
+    if (Iter == SoftenedFloats.end()) {
+      assert(isSimpleLegalType(Op.getValueType()) &&
+             "Operand wasn't converted to integer?");
       return Op;
+    }
+
+    SDValue &SoftenedOp = Iter->second;
+    assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?");
     RemapValue(SoftenedOp);
-    assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
     return SoftenedOp;
   }
   void SetSoftenedFloat(SDValue Op, SDValue Result);
@@ -618,7 +619,6 @@ private:
   SDValue ScalarizeVecRes_SETCC(SDNode *N);
   SDValue ScalarizeVecRes_UNDEF(SDNode *N);
   SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
-  SDValue ScalarizeVecRes_VSETCC(SDNode *N);
 
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -727,7 +727,6 @@ private:
   SDValue WidenVecRes_SETCC(SDNode* N);
   SDValue WidenVecRes_UNDEF(SDNode *N);
   SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
-  SDValue WidenVecRes_VSETCC(SDNode* N);
 
   SDValue WidenVecRes_Ternary(SDNode *N);
   SDValue WidenVecRes_Binary(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index f3306151d864..993465ae9dc2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -484,8 +484,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
                     St->getMemOperand()->getFlags(), AAInfo);
 
-  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+  Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
   Hi = DAG.getStore(Chain, dl, Hi, Ptr,
                     St->getPointerInfo().getWithOffset(IncrementSize),
                     MinAlign(Alignment, IncrementSize),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 9355dbe77f94..74970ab5792c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1,4 +1,4 @@
-//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -27,15 +27,34 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
 using namespace llvm;
 
 namespace {
+
 class VectorLegalizer {
   SelectionDAG& DAG;
   const TargetLowering &TLI;
-  bool Changed; // Keep track of whether anything changed
+  bool Changed = false; // Keep track of whether anything changed
 
   /// For nodes that are of legal width, and that have more than one use, this
   /// map indicates what regularized operand to use.  This allows us to avoid
@@ -128,12 +147,15 @@ class VectorLegalizer {
   SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
 
 public:
+  VectorLegalizer(SelectionDAG& dag) :
+      DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
+
   /// \brief Begin legalizer the vector operations in the DAG.
   bool Run();
-  VectorLegalizer(SelectionDAG& dag) :
-      DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
 };
 
+} // end anonymous namespace
+
 bool VectorLegalizer::Run() {
   // Before we start legalizing vector nodes, check if there are any vectors.
   bool HasVectors = false;
@@ -475,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
          "Can't promote a vector with multiple results!");
   EVT VT = Op.getValueType();
 
-  EVT NewVT;
+  EVT NewVT = VT;
   unsigned NewOpc;
-  while (1) {
-    NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+  while (true) {
+    NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
     assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
     if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
       NewOpc = ISD::FP_TO_SINT;
@@ -490,12 +512,19 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
     }
   }
 
-  SDLoc loc(Op);
-  SDValue promoted  = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
-  return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
+  SDLoc dl(Op);
+  SDValue Promoted  = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0));
+
+  // Assert that the converted value fits in the original type.  If it doesn't
+  // (eg: because the value being converted is too big), then the result of the
+  // original operation was undefined anyway, so the assert is still correct.
+  Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+                                                            : ISD::AssertSext,
+                         dl, NewVT, Promoted,
+                         DAG.getValueType(VT.getScalarType()));
+  return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
 }
 
-
 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
 
@@ -503,7 +532,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
   EVT SrcEltVT = SrcVT.getScalarType();
   unsigned NumElem = SrcVT.getVectorNumElements();
 
-
   SDValue NewChain;
   SDValue Value;
   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
@@ -534,7 +562,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
     unsigned Offset = 0;
     unsigned RemainingBytes = SrcVT.getStoreSize();
     SmallVector<SDValue, 8> LoadVals;
-
     while (RemainingBytes > 0) {
       SDValue ScalarLoad;
       unsigned LoadBytes = WideBytes;
@@ -560,9 +587,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
 
       RemainingBytes -= LoadBytes;
       Offset += LoadBytes;
-      BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                            DAG.getConstant(LoadBytes, dl,
-                                            BasePTR.getValueType()));
+
+      BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
 
       LoadVals.push_back(ScalarLoad.getValue(0));
       LoadChains.push_back(ScalarLoad.getValue(1));
@@ -1117,8 +1143,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   return DAG.getBuildVector(VT, dl, Ops);
 }
 
-}
-
 bool SelectionDAG::LegalizeVectors() {
   return VectorLegalizer(*this).Run();
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6aa3270883f0..8f2320f52a0f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -243,7 +243,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
   // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
   // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
   // legal and was not scalarized.
-  // See the similar logic in ScalarizeVecRes_VSETCC
+  // See the similar logic in ScalarizeVecRes_SETCC
   if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
     Op = GetScalarizedVector(Op);
   } else {
@@ -307,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
   SDLoc DL(N);
   // The vselect result and true/value operands needs scalarizing, but it's
   // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
-  // See the similar logic in ScalarizeVecRes_VSETCC
+  // See the similar logic in ScalarizeVecRes_SETCC
   if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
     Cond = GetScalarizedVector(Cond);
   } else {
@@ -380,21 +380,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
                      N->getOperand(4));
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
-  assert(N->getValueType(0).isVector() ==
-         N->getOperand(0).getValueType().isVector() &&
-         "Scalar/Vector type mismatch");
-
-  if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
-
-  SDValue LHS = GetScalarizedVector(N->getOperand(0));
-  SDValue RHS = GetScalarizedVector(N->getOperand(1));
-  SDLoc DL(N);
-
-  // Turn it into a scalar SETCC.
-  return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
-}
-
 SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
   return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
 }
@@ -408,7 +393,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
   return GetScalarizedVector(N->getOperand(Op));
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
          "Operand types must be vectors");
@@ -461,7 +446,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
       N->dump(&DAG);
       dbgs() << "\n";
 #endif
-      llvm_unreachable("Do not know how to scalarize this operator's operand!");
+      report_fatal_error("Do not know how to scalarize this operator's "
+                         "operand!\n");
     case ISD::BITCAST:
       Res = ScalarizeVecOp_BITCAST(N);
       break;
@@ -1068,34 +1054,57 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   if (CustomLowerNode(N, N->getValueType(0), true))
     return;
 
-  // Spill the vector to the stack.
+  // Make the vector elements byte-addressable if they aren't already.
   EVT VecVT = Vec.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
+  if (VecVT.getScalarSizeInBits() < 8) {
+    EltVT = MVT::i8;
+    VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+                             VecVT.getVectorNumElements());
+    Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+    // Extend the element type to match if needed.
+    if (EltVT.bitsGT(Elt.getValueType()))
+      Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
+  }
+
+  // Spill the vector to the stack.
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  SDValue Store =
-      DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+  auto &MF = DAG.getMachineFunction();
+  auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
 
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
   SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
   Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
-  Store =
-      DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT);
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr,
+                            MachinePointerInfo::getUnknownStack(MF), EltVT);
+
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
 
   // Load the Lo part from the stack slot.
-  Lo =
-      DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+  Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo);
 
   // Increment the pointer to the other part.
-  unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
+  unsigned IncrementSize = LoVT.getSizeInBits() / 8;
   StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                          DAG.getConstant(IncrementSize, dl,
                                          StackPtr.getValueType()));
 
   // Load the Hi part from the stack slot.
-  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+  Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
+                   PtrInfo.getWithOffset(IncrementSize),
                    MinAlign(Alignment, IncrementSize));
+
+  // If we adjusted the original type, we need to truncate the results.
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  if (LoVT != Lo.getValueType())
+    Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
+  if (HiVT != Hi.getValueType())
+    Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -1130,8 +1139,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                    LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+  Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
                    Alignment, MMOFlags, AAInfo);
@@ -1283,10 +1291,19 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDLoc DL(N);
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
 
-  // Split the input.
+  // If the input also splits, handle it directly. Otherwise split it by hand.
   SDValue LL, LH, RL, RH;
-  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
-  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+  if (getTypeAction(N->getOperand(0).getValueType()) ==
+      TargetLowering::TypeSplitVector)
+    GetSplitVector(N->getOperand(0), LL, LH);
+  else
+    std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+
+  if (getTypeAction(N->getOperand(1).getValueType()) ==
+      TargetLowering::TypeSplitVector)
+    GetSplitVector(N->getOperand(1), RL, RH);
+  else
+    std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
 
   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -1753,30 +1770,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   // Make the vector elements byte-addressable if they aren't already.
   SDLoc dl(N);
   EVT EltVT = VecVT.getVectorElementType();
-  if (EltVT.getSizeInBits() < 8) {
-    SmallVector<SDValue, 4> ElementOps;
-    for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
-      ElementOps.push_back(DAG.getAnyExtOrTrunc(
-          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
-                      DAG.getConstant(i, dl, MVT::i8)),
-          dl, MVT::i8));
-    }
-
+  if (VecVT.getScalarSizeInBits() < 8) {
     EltVT = MVT::i8;
     VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
                              VecVT.getVectorNumElements());
-    Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
+    Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
   }
 
   // Store the vector to the stack.
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  SDValue Store =
-      DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+  auto &MF = DAG.getMachineFunction();
+  auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
 
   // Load back the required element.
   StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
-  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
-                        MachinePointerInfo(), EltVT);
+  return DAG.getExtLoad(
+      ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+      MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
@@ -1886,9 +1898,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
   else
     std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
 
-  MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
-  MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
-
   // if Alignment is equal to the vector size,
   // take the half of it for the second part
   unsigned SecondHalfAlignment =
@@ -1955,7 +1964,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
   else
     std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
 
-  SDValue Lo, Hi;
+  SDValue Lo;
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(N->getPointerInfo(),
                          MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
@@ -1970,13 +1979,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
                          MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
                          Alignment, N->getAAInfo(), N->getRanges());
 
-  SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
-  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
-                            DL, OpsHi, MMO);
-
-  // Build a factor node to remember that this store is independent of the
-  // other one.
-  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+  // The order of the Scatter operation after split is well defined. The "Hi"
+  // part comes after the "Lo". So these two operations should be chained one
+  // after another.
+  SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi};
+  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+                              DL, OpsHi, MMO);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2007,8 +2015,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
                       AAInfo);
 
   // Increment the pointer to the other half.
-  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+  Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
 
   if (isTruncating)
     Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
@@ -2919,30 +2926,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
   ISD::LoadExtType ExtType = N->getExtensionType();
   SDLoc dl(N);
 
-  if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
-    Mask = GetWidenedVector(Mask);
-  else {
-    EVT BoolVT = getSetCCResultType(WidenVT);
-
-    // We can't use ModifyToType() because we should fill the mask with
-    // zeroes
-    unsigned WidenNumElts = BoolVT.getVectorNumElements();
-    unsigned MaskNumElts = MaskVT.getVectorNumElements();
-
-    unsigned NumConcat = WidenNumElts / MaskNumElts;
-    SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
-    Ops[0] = Mask;
-    for (unsigned i = 1; i != NumConcat; ++i)
-      Ops[i] = ZeroVal;
-
-    Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
-  }
+  // The mask should be widened as well
+  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+                                    MaskVT.getVectorElementType(),
+                                    WidenVT.getVectorNumElements());
+  Mask = ModifyToType(Mask, WideMaskVT, true);
 
   SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
                                   Mask, Src0, N->getMemoryVT(),
                                   N->getMemOperand(), ExtType,
-	                                N->isExpandingLoad());
+                                        N->isExpandingLoad());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -2953,12 +2946,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
 
   EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Mask = N->getMask();
+  EVT MaskVT = Mask.getValueType();
   SDValue Src0 = GetWidenedVector(N->getValue());
   unsigned NumElts = WideVT.getVectorNumElements();
   SDLoc dl(N);
 
   // The mask should be widened as well
-  Mask = WidenTargetBoolean(Mask, WideVT, true);
+  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+                                    MaskVT.getVectorElementType(),
+                                    WideVT.getVectorNumElements());
+  Mask = ModifyToType(Mask, WideMaskVT, true);
 
   // Widen the Index operand
   SDValue Index = N->getIndex();
@@ -3032,7 +3029,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
 
   // Make a new Mask node, with a legal result VT.
   SmallVector<SDValue, 4> Ops;
-  for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
+  for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
     Ops.push_back(InMask->getOperand(i));
   SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
 
@@ -3065,12 +3062,9 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
   } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
     unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
     EVT SubVT = Mask->getValueType(0);
-    SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
-    SubConcatOps[0] = Mask;
-    for (unsigned i = 1; i < NumSubVecs; ++i)
-      SubConcatOps[i] = DAG.getUNDEF(SubVT);
-    Mask =
-        DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
+    SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT));
+    SubOps[0] = Mask;
+    Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps);
   }
 
   assert((Mask->getValueType(0) == ToMaskVT) &&
@@ -3105,7 +3099,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
 
   // If this is a splitted VSELECT that was previously already handled, do
   // nothing.
-  if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+  EVT CondVT = Cond->getValueType(0);
+  if (CondVT.getScalarSizeInBits() != 1)
     return SDValue();
 
   EVT VSelVT = N->getValueType(0);
@@ -3129,6 +3124,14 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
     EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
     if (SetCCResVT.getScalarSizeInBits() == 1)
       return SDValue();
+  } else if (CondVT.getScalarType() == MVT::i1) {
+    // If there is support for an i1 vector mask (or only scalar i1 conditions),
+    // don't touch.
+    while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
+      CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
+
+    if (CondVT.getScalarType() == MVT::i1)
+      return SDValue();
   }
 
   // Get the VT and operands for VSELECT, and widen if needed.
@@ -3236,19 +3239,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
                      N->getOperand(1), InOp1, InOp2, N->getOperand(4));
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
-  assert(N->getValueType(0).isVector() ==
-         N->getOperand(0).getValueType().isVector() &&
-         "Scalar/Vector type mismatch");
-  if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
-
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
-  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-  return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
-                     InOp1, InOp2, N->getOperand(2));
-}
-
 SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
  return DAG.getUNDEF(WidenVT);
@@ -3279,7 +3269,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
   return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
          "Operands must be vectors");
@@ -3556,6 +3546,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 3 && "Can widen only data operand of mstore");
   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
   EVT MaskVT = Mask.getValueType();
@@ -3564,25 +3555,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
   SDValue WideVal = GetWidenedVector(StVal);
   SDLoc dl(N);
 
-  if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
-    Mask = GetWidenedVector(Mask);
-  else {
-    // The mask should be widened as well.
-    EVT BoolVT = getSetCCResultType(WideVal.getValueType());
-    // We can't use ModifyToType() because we should fill the mask with
-    // zeroes.
-    unsigned WidenNumElts = BoolVT.getVectorNumElements();
-    unsigned MaskNumElts = MaskVT.getVectorNumElements();
-
-    unsigned NumConcat = WidenNumElts / MaskNumElts;
-    SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
-    Ops[0] = Mask;
-    for (unsigned i = 1; i != NumConcat; ++i)
-      Ops[i] = ZeroVal;
+  // The mask should be widened as well.
+  EVT WideVT = WideVal.getValueType();
+  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+                                    MaskVT.getVectorElementType(),
+                                    WideVT.getVectorNumElements());
+  Mask = ModifyToType(Mask, WideMaskVT, true);
 
-    Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
-  }
   assert(Mask.getValueType().getVectorNumElements() ==
          WideVal.getValueType().getVectorNumElements() &&
          "Mask and data vectors should have the same number of elements");
@@ -3596,15 +3575,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   SDValue DataOp = MSC->getValue();
   SDValue Mask = MSC->getMask();
+  EVT MaskVT = Mask.getValueType();
 
   // Widen the value.
   SDValue WideVal = GetWidenedVector(DataOp);
   EVT WideVT = WideVal.getValueType();
-  unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+  unsigned NumElts = WideVT.getVectorNumElements();
   SDLoc dl(N);
 
   // The mask should be widened as well.
-  Mask = WidenTargetBoolean(Mask, WideVT, true);
+  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+                                    MaskVT.getVectorElementType(), NumElts);
+  Mask = ModifyToType(Mask, WideMaskVT, true);
 
   // Widen index.
   SDValue Index = MSC->getIndex();
@@ -3806,8 +3788,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
   while (LdWidth > 0) {
     unsigned Increment = NewVTWidth / 8;
     Offset += Increment;
-    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                          DAG.getConstant(Increment, dl, BasePtr.getValueType()));
+    BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
 
     SDValue L;
     if (LdWidth < NewVTWidth) {
@@ -3839,7 +3820,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
     }
 
     LdOps.push_back(L);
-
+    LdOp = L;
 
     LdWidth -= NewVTWidth;
   }
@@ -3929,10 +3910,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
   LdChain.push_back(Ops[0].getValue(1));
   unsigned i = 0, Offset = Increment;
   for (i=1; i < NumElts; ++i, Offset += Increment) {
-    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
-                                     BasePtr,
-                                     DAG.getConstant(Offset, dl,
-                                                     BasePtr.getValueType()));
+    SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
     Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
                             LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
                             Align, MMOFlags, AAInfo);
@@ -3987,9 +3965,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
         StWidth -= NewVTWidth;
         Offset += Increment;
         Idx += NumVTElts;
-        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                              DAG.getConstant(Increment, dl,
-                                              BasePtr.getValueType()));
+
+        BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
       } while (StWidth != 0 && StWidth >= NewVTWidth);
     } else {
       // Cast the vector to the scalar type we can store.
@@ -4008,9 +3985,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
             MinAlign(Align, Offset), MMOFlags, AAInfo));
         StWidth -= NewVTWidth;
         Offset += Increment;
-        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                              DAG.getConstant(Increment, dl,
-                                              BasePtr.getValueType()));
+        BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
       } while (StWidth != 0 && StWidth >= NewVTWidth);
       // Restore index back to be relative to the original widen element type.
       Idx = Idx * NewVTWidth / ValEltWidth;
@@ -4053,10 +4028,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
                                       MMOFlags, AAInfo));
   unsigned Offset = Increment;
   for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
-    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
-                                     BasePtr,
-                                     DAG.getConstant(Offset, dl,
-                                                     BasePtr.getValueType()));
+    SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
     SDValue EOp = DAG.getNode(
         ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index a21b4c733254..379f0dcef513 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -22,12 +22,12 @@
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 237d541b4cb9..cf92907a8b5f 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -20,32 +20,31 @@
 
 namespace llvm {
 
-class MDNode;
+class DIVariable;
+class DIExpression;
 class SDNode;
 class Value;
 
-/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// Holds the information from a dbg_value node through SDISel.
 /// We do not use SDValue here to avoid including its header.
-
 class SDDbgValue {
 public:
   enum DbgValueKind {
-    SDNODE = 0,             // value is the result of an expression
-    CONST = 1,              // value is a constant
-    FRAMEIX = 2             // value is contents of a stack location
+    SDNODE = 0,             ///< Value is the result of an expression.
+    CONST = 1,              ///< Value is a constant.
+    FRAMEIX = 2             ///< Value is contents of a stack location.
   };
 private:
   union {
     struct {
-      SDNode *Node;         // valid for expressions
-      unsigned ResNo;       // valid for expressions
+      SDNode *Node;         ///< Valid for expressions.
+      unsigned ResNo;       ///< Valid for expressions.
     } s;
-    const Value *Const;     // valid for constants
-    unsigned FrameIx;       // valid for stack objects
+    const Value *Const;     ///< Valid for constants.
+    unsigned FrameIx;       ///< Valid for stack objects.
   } u;
-  MDNode *Var;
-  MDNode *Expr;
-  uint64_t Offset;
+  DIVariable *Var;
+  DIExpression *Expr;
   DebugLoc DL;
   unsigned Order;
   enum DbgValueKind kind;
@@ -53,71 +52,65 @@ private:
   bool Invalid = false;
 
 public:
-  // Constructor for non-constants.
-  SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
-             uint64_t off, DebugLoc dl, unsigned O)
-      : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
-        IsIndirect(indir) {
+  /// Constructor for non-constants.
+  SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R,
+             bool indir, DebugLoc dl, unsigned O)
+      : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) {
     kind = SDNODE;
     u.s.Node = N;
     u.s.ResNo = R;
   }
 
-  // Constructor for constants.
-  SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
-             DebugLoc dl, unsigned O)
-      : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
-        IsIndirect(false) {
+  /// Constructor for constants.
+  SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl,
+             unsigned O)
+      : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
     kind = CONST;
     u.Const = C;
   }
 
-  // Constructor for frame indices.
-  SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
+  /// Constructor for frame indices.
+  SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl,
              unsigned O)
-      : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
-        IsIndirect(false) {
+      : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
     kind = FRAMEIX;
     u.FrameIx = FI;
   }
 
-  // Returns the kind.
+  /// Returns the kind.
   DbgValueKind getKind() const { return kind; }
 
-  // Returns the MDNode pointer for the variable.
-  MDNode *getVariable() const { return Var; }
+  /// Returns the DIVariable pointer for the variable.
+  DIVariable *getVariable() const { return Var; }
 
-  // Returns the MDNode pointer for the expression.
-  MDNode *getExpression() const { return Expr; }
+  /// Returns the DIExpression pointer for the expression.
+  DIExpression *getExpression() const { return Expr; }
 
-  // Returns the SDNode* for a register ref
+  /// Returns the SDNode* for a register ref
   SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
 
-  // Returns the ResNo for a register ref
+  /// Returns the ResNo for a register ref
   unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
 
-  // Returns the Value* for a constant
+  /// Returns the Value* for a constant
   const Value *getConst() const { assert (kind==CONST); return u.Const; }
 
-  // Returns the FrameIx for a stack object
+  /// Returns the FrameIx for a stack object
   unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
 
-  // Returns whether this is an indirect value.
+  /// Returns whether this is an indirect value.
   bool isIndirect() const { return IsIndirect; }
 
-  // Returns the offset.
-  uint64_t getOffset() const { return Offset; }
-
-  // Returns the DebugLoc.
+  /// Returns the DebugLoc.
   DebugLoc getDebugLoc() const { return DL; }
 
-  // Returns the SDNodeOrder.  This is the order of the preceding node in the
-  // input.
+  /// Returns the SDNodeOrder.  This is the order of the preceding node in the
+  /// input.
   unsigned getOrder() const { return Order; }
 
-  // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
-  // property. A SDDbgValue is invalid if the SDNode that produces the value is
-  // deleted.
+  /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+  /// property. A SDDbgValue is invalid if the SDNode that produces the value is
+  /// deleted.
   void setIsInvalidated() { Invalid = true; }
   bool isInvalidated() const { return Invalid; }
 };
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 137994093277..698e14453d1d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -18,13 +18,13 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "pre-RA-sched"
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 70b1fa77a099..49f304c8cc86 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1,4 +1,4 @@
-//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,23 +16,47 @@
 //===----------------------------------------------------------------------===//
 
 #include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "pre-RA-sched"
@@ -46,6 +70,7 @@ static RegisterScheduler
   burrListDAGScheduler("list-burr",
                        "Bottom-up register reduction list scheduling",
                        createBURRListDAGScheduler);
+
 static RegisterScheduler
   sourceListDAGScheduler("source",
                          "Similar to list-burr but schedules in source "
@@ -105,6 +130,7 @@ static cl::opt<unsigned> AvgIPC(
   cl::desc("Average inst/cycle whan no target itinerary exists."));
 
 namespace {
+
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
 /// implementation.  This supports both top-down and bottom-up scheduling.
@@ -112,7 +138,6 @@ namespace {
 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
 private:
   /// NeedLatency - True if the scheduler will make use of latency information.
-  ///
   bool NeedLatency;
 
   /// AvailableQueue - The priority queue to use for the available SUnits.
@@ -122,13 +147,13 @@ private:
   /// been issued, but their results are not ready yet (due to the latency of
   /// the operation).  Once the operands becomes available, the instruction is
   /// added to the AvailableQueue.
-  std::vector<SUnit*> PendingQueue;
+  std::vector<SUnit *> PendingQueue;
 
   /// HazardRec - The hazard recognizer to use.
   ScheduleHazardRecognizer *HazardRec;
 
   /// CurCycle - The current scheduler state corresponds to this cycle.
-  unsigned CurCycle;
+  unsigned CurCycle = 0;
 
   /// MinAvailableCycle - Cycle of the soonest available instruction.
   unsigned MinAvailableCycle;
@@ -147,7 +172,9 @@ private:
   // Collect interferences between physical register use/defs.
   // Each interference is an SUnit and set of physical registers.
   SmallVector<SUnit*, 4> Interferences;
-  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+
+  using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
+
   LRegsMapT LRegsMap;
 
   /// Topo - A topological ordering for SUnits which permits fast IsReachable
@@ -163,9 +190,8 @@ public:
                     SchedulingPriorityQueue *availqueue,
                     CodeGenOpt::Level OptLevel)
     : ScheduleDAGSDNodes(mf),
-      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+      NeedLatency(needlatency), AvailableQueue(availqueue),
       Topo(SUnits, nullptr) {
-
     const TargetSubtargetInfo &STI = mf.getSubtarget();
     if (DisableSchedCycles || !NeedLatency)
       HazardRec = new ScheduleHazardRecognizer();
@@ -267,6 +293,7 @@ private:
     return !NeedLatency;
   }
 };
+
 }  // end anonymous namespace
 
 /// GetCostForDef - Looks up the register class and cost for a given definition.
@@ -319,13 +346,13 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
-  DEBUG(dbgs()
-        << "********** List Scheduling BB#" << BB->getNumber()
-        << " '" << BB->getName() << "' **********\n");
+  DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+               << " '" << BB->getName() << "' **********\n");
 
   CurCycle = 0;
   IssueCount = 0;
-  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+  MinAvailableCycle =
+      DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
   NumLiveRegs = 0;
   // Allocate slots for each physical register, plus one for a special register
   // to track the virtual resource of a calling sequence.
@@ -409,7 +436,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
                              unsigned NestLevel,
                              const TargetInstrInfo *TII) {
   SDNode *N = Outer;
-  for (;;) {
+  while (true) {
     if (N == Inner)
       return true;
     // For a TokenFactor, examine each operand. There may be multiple ways
@@ -456,7 +483,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
 static SDNode *
 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
                  const TargetInstrInfo *TII) {
-  for (;;) {
+  while (true) {
     // For a TokenFactor, examine each operand. There may be multiple ways
     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     // most nesting in order to ensure that we find the corresponding match.
@@ -550,6 +577,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
         unsigned NestLevel = 0;
         unsigned MaxNest = 0;
         SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+        assert(N && "Must find call sequence start");
 
         SUnit *Def = &SUnits[N->getNodeId()];
         CallSeqEndForStart[Def] = SU;
@@ -571,7 +599,7 @@ void ScheduleDAGRRList::ReleasePending() {
 
   // If the available queue is empty, it is safe to reset MinAvailableCycle.
   if (AvailableQueue->empty())
-    MinAvailableCycle = UINT_MAX;
+    MinAvailableCycle = std::numeric_limits<unsigned>::max();
 
   // Check to see if any of the pending instructions are ready to issue.  If
   // so, add them to the available queue.
@@ -791,7 +819,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
       AvailableQueue->remove(PredSU);
   }
 
-  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+  assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+         "NumSuccsLeft will overflow!");
   ++PredSU->NumSuccsLeft;
 }
 
@@ -821,9 +850,13 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
        SUNode = SUNode->getGluedNode()) {
     if (SUNode->isMachineOpcode() &&
         SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+      SUnit *SeqEnd = CallSeqEndForStart[SU];
+      assert(SeqEnd && "Call sequence start/end must be known");
+      assert(!LiveRegDefs[CallResource]);
+      assert(!LiveRegGens[CallResource]);
       ++NumLiveRegs;
       LiveRegDefs[CallResource] = SU;
-      LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+      LiveRegGens[CallResource] = SeqEnd;
     }
   }
 
@@ -835,6 +868,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
       if (SUNode->isMachineOpcode() &&
           SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        assert(LiveRegDefs[CallResource]);
+        assert(LiveRegGens[CallResource]);
         --NumLiveRegs;
         LiveRegDefs[CallResource] = nullptr;
         LiveRegGens[CallResource] = nullptr;
@@ -891,7 +926,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
   if (LookAhead == 0)
     return;
 
-  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
   unsigned HazardCycle = (*I)->getHeight();
   for (auto E = Sequence.end(); I != E; ++I) {
     SUnit *SU = *I;
@@ -1319,8 +1354,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
     // If we're in the middle of scheduling a call, don't begin scheduling
     // another call. Also, don't allow any physical registers to be live across
     // the call.
-    if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) ||
-        (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+    if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
       // Check the special calling-sequence resource.
       unsigned CallResource = TRI->getNumRegs();
       if (LiveRegDefs[CallResource]) {
@@ -1390,27 +1424,32 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
 /// (3) No Interferences: may unschedule to break register interferences.
 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
   SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
-  while (CurSU) {
-    SmallVector<unsigned, 4> LRegs;
-    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
-      break;
-    DEBUG(dbgs() << "    Interfering reg " <<
-          (LRegs[0] == TRI->getNumRegs() ? "CallResource"
-           : TRI->getName(LRegs[0]))
-           << " SU #" << CurSU->NodeNum << '\n');
-    std::pair<LRegsMapT::iterator, bool> LRegsPair =
-      LRegsMap.insert(std::make_pair(CurSU, LRegs));
-    if (LRegsPair.second) {
-      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
-      Interferences.push_back(CurSU);
-    }
-    else {
-      assert(CurSU->isPending && "Interferences are pending");
-      // Update the interference with current live regs.
-      LRegsPair.first->second = LRegs;
+  auto FindAvailableNode = [&]() {
+    while (CurSU) {
+      SmallVector<unsigned, 4> LRegs;
+      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+        break;
+      DEBUG(dbgs() << "    Interfering reg ";
+            if (LRegs[0] == TRI->getNumRegs())
+              dbgs() << "CallResource";
+            else
+              dbgs() << printReg(LRegs[0], TRI);
+            dbgs() << " SU #" << CurSU->NodeNum << '\n');
+      std::pair<LRegsMapT::iterator, bool> LRegsPair =
+        LRegsMap.insert(std::make_pair(CurSU, LRegs));
+      if (LRegsPair.second) {
+        CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+        Interferences.push_back(CurSU);
+      }
+      else {
+        assert(CurSU->isPending && "Interferences are pending");
+        // Update the interference with current live regs.
+        LRegsPair.first->second = LRegs;
+      }
+      CurSU = AvailableQueue->pop();
     }
-    CurSU = AvailableQueue->pop();
-  }
+  };
+  FindAvailableNode();
   if (CurSU)
     return CurSU;
 
@@ -1423,7 +1462,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
     // Try unscheduling up to the point where it's safe to schedule
     // this node.
     SUnit *BtSU = nullptr;
-    unsigned LiveCycle = UINT_MAX;
+    unsigned LiveCycle = std::numeric_limits<unsigned>::max();
     for (unsigned Reg : LRegs) {
       if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
         BtSU = LiveRegGens[Reg];
@@ -1447,13 +1486,16 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
 
       // If one or more successors has been unscheduled, then the current
       // node is no longer available.
-      if (!TrySU->isAvailable || !TrySU->NodeQueueId)
+      if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
+        DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
         CurSU = AvailableQueue->pop();
-      else {
+      } else {
+        DEBUG(dbgs() << "TrySU available\n");
         // Available and in AvailableQueue
         AvailableQueue->remove(TrySU);
         CurSU = TrySU;
       }
+      FindAvailableNode();
       // Interferences has been mutated. We must break.
       break;
     }
@@ -1540,7 +1582,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
 
     while (AvailableQueue->empty() && !PendingQueue.empty()) {
       // Advance the cycle to free resources. Skip ahead to the next ready SU.
-      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+      assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
+             "MinAvailableCycle uninitialized");
       AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
     }
   }
@@ -1553,17 +1596,11 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
 #endif
 }
 
-//===----------------------------------------------------------------------===//
-//                RegReductionPriorityQueue Definition
-//===----------------------------------------------------------------------===//
-//
-// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
-// to reduce register pressure.
-//
 namespace {
+
 class RegReductionPQBase;
 
-struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+struct queue_sort {
   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
 };
 
@@ -1571,6 +1608,7 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
 template<class SF>
 struct reverse_sort : public queue_sort {
   SF &SortFunc;
+
   reverse_sort(SF &sf) : SortFunc(sf) {}
 
   bool operator()(SUnit* left, SUnit* right) const {
@@ -1590,6 +1628,7 @@ struct bu_ls_rr_sort : public queue_sort {
   };
 
   RegReductionPQBase *SPQ;
+
   bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
 
   bool operator()(SUnit* left, SUnit* right) const;
@@ -1603,8 +1642,8 @@ struct src_ls_rr_sort : public queue_sort {
   };
 
   RegReductionPQBase *SPQ;
-  src_ls_rr_sort(RegReductionPQBase *spq)
-    : SPQ(spq) {}
+
+  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
 
   bool operator()(SUnit* left, SUnit* right) const;
 };
@@ -1617,8 +1656,8 @@ struct hybrid_ls_rr_sort : public queue_sort {
   };
 
   RegReductionPQBase *SPQ;
-  hybrid_ls_rr_sort(RegReductionPQBase *spq)
-    : SPQ(spq) {}
+
+  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
 
   bool isReady(SUnit *SU, unsigned CurCycle) const;
 
@@ -1634,8 +1673,8 @@ struct ilp_ls_rr_sort : public queue_sort {
   };
 
   RegReductionPQBase *SPQ;
-  ilp_ls_rr_sort(RegReductionPQBase *spq)
-    : SPQ(spq) {}
+
+  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
 
   bool isReady(SUnit *SU, unsigned CurCycle) const;
 
@@ -1644,8 +1683,8 @@ struct ilp_ls_rr_sort : public queue_sort {
 
 class RegReductionPQBase : public SchedulingPriorityQueue {
 protected:
-  std::vector<SUnit*> Queue;
-  unsigned CurQueueId;
+  std::vector<SUnit *> Queue;
+  unsigned CurQueueId = 0;
   bool TracksRegPressure;
   bool SrcOrder;
 
@@ -1656,13 +1695,12 @@ protected:
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
   const TargetLowering *TLI;
-  ScheduleDAGRRList *scheduleDAG;
+  ScheduleDAGRRList *scheduleDAG = nullptr;
 
   // SethiUllmanNumbers - The SethiUllman number for each node.
   std::vector<unsigned> SethiUllmanNumbers;
 
   /// RegPressure - Tracking current reg pressure per register class.
-  ///
   std::vector<unsigned> RegPressure;
 
   /// RegLimit - Tracking the number of allocatable registers per register
@@ -1677,9 +1715,8 @@ public:
                      const TargetInstrInfo *tii,
                      const TargetRegisterInfo *tri,
                      const TargetLowering *tli)
-    : SchedulingPriorityQueue(hasReadyFilter),
-      CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
-      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
+    : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
+      SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
     if (TracksRegPressure) {
       unsigned NumRC = TRI->getNumRegClasses();
       RegLimit.resize(NumRC);
@@ -1730,7 +1767,7 @@ public:
   void remove(SUnit *SU) override {
     assert(!Queue.empty() && "Queue is empty!");
     assert(SU->NodeQueueId != 0 && "Not in queue!");
-    std::vector<SUnit *>::iterator I = find(Queue, SU);
+    std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
     if (I != std::prev(Queue.end()))
       std::swap(*I, Queue.back());
     Queue.pop_back();
@@ -1759,7 +1796,7 @@ protected:
 };
 
 template<class SF>
-static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
   std::vector<SUnit *>::iterator Best = Q.begin();
   for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
     if (Picker(*Best, *I))
@@ -1772,7 +1809,7 @@ static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
 }
 
 template<class SF>
-SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
 #ifndef NDEBUG
   if (DAG->StressSched) {
     reverse_sort<SF> RPicker(Picker);
@@ -1783,6 +1820,13 @@ SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
   return popFromQueueImpl(Q, Picker);
 }
 
+//===----------------------------------------------------------------------===//
+//                RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
 template<class SF>
 class RegReductionPriorityQueue : public RegReductionPQBase {
   SF Picker;
@@ -1815,7 +1859,7 @@ public:
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
     // Emulate pop() without clobbering NodeQueueIds.
-    std::vector<SUnit*> DumpQueue = Queue;
+    std::vector<SUnit *> DumpQueue = Queue;
     SF DumpPicker = Picker;
     while (!DumpQueue.empty()) {
       SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
@@ -1826,17 +1870,11 @@ public:
 #endif
 };
 
-typedef RegReductionPriorityQueue<bu_ls_rr_sort>
-BURegReductionPriorityQueue;
-
-typedef RegReductionPriorityQueue<src_ls_rr_sort>
-SrcRegReductionPriorityQueue;
+using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
+using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
+using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
+using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
 
-typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
-HybridBURRPriorityQueue;
-
-typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
-ILPBURRPriorityQueue;
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -2855,7 +2893,6 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
 /// This results in the store being scheduled immediately
 /// after N, which shortens the U->N live range, reducing
 /// register pressure.
-///
 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
   // Visit all the nodes in topological order, working top-down.
   for (SUnit &SU : *SUnits) {
@@ -3022,7 +3059,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
                                  CodeGenOpt::Level OptLevel) {
   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3036,7 +3073,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
   return SD;
 }
 
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
                                    CodeGenOpt::Level OptLevel) {
   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3050,7 +3087,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
   return SD;
 }
 
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
                                    CodeGenOpt::Level OptLevel) {
   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3066,7 +3103,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
   return SD;
 }
 
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
 llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
                                 CodeGenOpt::Level OptLevel) {
   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3c8526ebb702..c09b47af26a6 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -23,14 +23,14 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "pre-RA-sched"
@@ -709,18 +709,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
   // source order number as N.
   MachineBasicBlock *BB = Emitter.getBlock();
   MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
-  ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
-  for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
-    if (DVs[i]->isInvalidated())
+  for (auto DV : DAG->GetDbgValues(N)) {
+    if (DV->isInvalidated())
       continue;
-    unsigned DVOrder = DVs[i]->getOrder();
+    unsigned DVOrder = DV->getOrder();
     if (!Order || DVOrder == Order) {
-      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+      MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
       if (DbgMI) {
-        Orders.push_back(std::make_pair(DVOrder, DbgMI));
+        Orders.push_back({DVOrder, DbgMI});
         BB->insert(InsertPos, DbgMI);
       }
-      DVs[i]->setIsInvalidated();
+      DV->setIsInvalidated();
     }
   }
 }
@@ -742,16 +741,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
   }
 
   MachineBasicBlock *BB = Emitter.getBlock();
-  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
+  auto IP = Emitter.getInsertPos();
+  if (IP == BB->begin() || BB->back().isPHI() ||
       // Fast-isel may have inserted some instructions, in which case the
       // BB->back().isPHI() test will not fire when we want it to.
-      std::prev(Emitter.getInsertPos())->isPHI()) {
+      std::prev(IP)->isPHI()) {
     // Did not insert any instruction.
-    Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr));
+    Orders.push_back({Order, (MachineInstr *)nullptr});
     return;
   }
 
-  Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos())));
+  Orders.push_back({Order, &*std::prev(IP)});
   ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
 }
 
@@ -856,8 +856,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
 
     // Sort the source order instructions and use the order to insert debug
-    // values.
-    std::sort(Orders.begin(), Orders.end(), less_first());
+    // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
+    // regardless of the host's implementation fo std::sort.
+    std::stable_sort(Orders.begin(), Orders.end(), less_first());
+    std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
+                     [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
+                       return LHS->getOrder() < RHS->getOrder();
+                     });
 
     SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
     SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
@@ -869,10 +874,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
       // Insert all SDDbgValue's whose order(s) are before "Order".
       if (!MI)
         continue;
-      for (; DI != DE &&
-             (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+      for (; DI != DE; ++DI) {
+        if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
+          break;
         if ((*DI)->isInvalidated())
           continue;
+
         MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
         if (DbgMI) {
           if (!LastOrder)
@@ -891,11 +898,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     // Add trailing DbgValue's before the terminator. FIXME: May want to add
     // some of them before one or more conditional branches?
     SmallVector<MachineInstr*, 8> DbgMIs;
-    while (DI != DE) {
-      if (!(*DI)->isInvalidated())
-        if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
-          DbgMIs.push_back(DbgMI);
-      ++DI;
+    for (; DI != DE; ++DI) {
+      if ((*DI)->isInvalidated())
+        continue;
+      assert((*DI)->getOrder() >= LastOrder &&
+             "emitting DBG_VALUE out of order");
+      if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+        DbgMIs.push_back(DbgMI);
     }
 
     MachineBasicBlock *InsertBB = Emitter.getBlock();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 631cb34717c4..07b46b9183ab 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -25,13 +25,13 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <climits>
 using namespace llvm;
 
@@ -93,9 +93,8 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGVLIW::Schedule() {
-  DEBUG(dbgs()
-        << "********** List Scheduling BB#" << BB->getNumber()
-        << " '" << BB->getName() << "' **********\n");
+  DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+               << " '" << BB->getName() << "' **********\n");
 
   // Build the scheduling graph.
   BuildSchedGraph(AA);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 16f425dc7969..12a21e74079e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -37,6 +37,9 @@
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -59,11 +62,8 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -87,6 +87,15 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
 void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
 void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
 
+#define DEBUG_TYPE "selectiondag"
+
+static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
+  DEBUG(
+    dbgs() << Msg;
+    V.getNode()->dump(G);
+  );
+}
+
 //===----------------------------------------------------------------------===//
 //                              ConstantFPSDNode Class
 //===----------------------------------------------------------------------===//
@@ -116,8 +125,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
 //                              ISD Namespace
 //===----------------------------------------------------------------------===//
 
-bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
-                                bool AllowShrink) {
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
   auto *BV = dyn_cast<BuildVectorSDNode>(N);
   if (!BV)
     return false;
@@ -126,10 +134,9 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
   unsigned SplatBitSize;
   bool HasUndefs;
   unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
-  unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
   return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
-                             MinSplatBits) &&
-         EltSize >= SplatBitSize;
+                             EltSize) &&
+         EltSize == SplatBitSize;
 }
 
 // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
@@ -895,12 +902,14 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
 }
 
 void SelectionDAG::init(MachineFunction &NewMF,
-                        OptimizationRemarkEmitter &NewORE) {
+                        OptimizationRemarkEmitter &NewORE,
+                        Pass *PassPtr) {
   MF = &NewMF;
+  SDAGISelPass = PassPtr;
   ORE = &NewORE;
   TLI = getSubtarget().getTargetLowering();
   TSI = getSubtarget().getSelectionDAGInfo();
-  Context = &MF->getFunction()->getContext();
+  Context = &MF->getFunction().getContext();
 }
 
 SelectionDAG::~SelectionDAG() {
@@ -1018,7 +1027,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
   assert(!VT.isVector() &&
          "getZeroExtendInReg should use the vector element type instead of "
          "the vector type!");
-  if (Op.getValueType() == VT) return Op;
+  if (Op.getValueType().getScalarType() == VT) return Op;
   unsigned BitWidth = Op.getScalarValueSizeInBits();
   APInt Imm = APInt::getLowBitsSet(BitWidth,
                                    VT.getSizeInBits());
@@ -1156,7 +1165,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
       Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
-    return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+
+    SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+    return V;
   }
 
   assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
@@ -1176,11 +1187,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
     N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
     CSEMap.InsertNode(N, IP);
     InsertNode(N);
+    NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
   }
 
   SDValue Result(N, 0);
   if (VT.isVector())
     Result = getSplatBuildVector(VT, DL, Result);
+
   return Result;
 }
 
@@ -1222,6 +1235,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
   SDValue Result(N, 0);
   if (VT.isVector())
     Result = getSplatBuildVector(VT, DL, Result);
+  NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
   return Result;
 }
 
@@ -1317,7 +1331,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = MF->getFunction()->optForSize()
+    Alignment = MF->getFunction().optForSize()
                     ? getDataLayout().getABITypeAlignment(C->getType())
                     : getDataLayout().getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1471,7 +1485,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
   // Validate that all indices in Mask are within the range of the elements
   // input to the shuffle.
   int NElts = Mask.size();
-  assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+  assert(llvm::all_of(Mask,
+                      [&](int M) { return M < (NElts * 2) && M >= -1; }) &&
          "Index out of range");
 
   // Copy the mask so we can do any needed cleanup.
@@ -1622,7 +1637,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
 
   CSEMap.InsertNode(N, IP);
   InsertNode(N);
-  return SDValue(N, 0);
+  SDValue V = SDValue(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
 }
 
 SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
@@ -1665,15 +1682,20 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
 
 SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
                                  MCSymbol *Label) {
+  return getLabelNode(ISD::EH_LABEL, dl, Root, Label);
+}
+
+SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
+                                   SDValue Root, MCSymbol *Label) {
   FoldingSetNodeID ID;
   SDValue Ops[] = { Root };
-  AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
+  AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops);
   ID.AddPointer(Label);
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+  auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -1955,6 +1977,69 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
   return SDValue();
 }
 
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by Mask are used.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
+  switch (V.getOpcode()) {
+  default:
+    break;
+  case ISD::Constant: {
+    const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+    assert(CV && "Const value should be ConstSDNode.");
+    const APInt &CVal = CV->getAPIntValue();
+    APInt NewVal = CVal & Mask;
+    if (NewVal != CVal)
+      return getConstant(NewVal, SDLoc(V), V.getValueType());
+    break;
+  }
+  case ISD::OR:
+  case ISD::XOR:
+    // If the LHS or RHS don't contribute bits to the or, drop them.
+    if (MaskedValueIsZero(V.getOperand(0), Mask))
+      return V.getOperand(1);
+    if (MaskedValueIsZero(V.getOperand(1), Mask))
+      return V.getOperand(0);
+    break;
+  case ISD::SRL:
+    // Only look at single-use SRLs.
+    if (!V.getNode()->hasOneUse())
+      break;
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+      // See if we can recursively simplify the LHS.
+      unsigned Amt = RHSC->getZExtValue();
+
+      // Watch out for shift count overflow though.
+      if (Amt >= Mask.getBitWidth())
+        break;
+      APInt NewMask = Mask << Amt;
+      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+        return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
+                       V.getOperand(1));
+    }
+    break;
+  case ISD::AND: {
+    // X & -1 -> X (ignoring bits which aren't demanded).
+    ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
+    if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue()))
+      return V.getOperand(0);
+    break;
+  }
+  case ISD::ANY_EXTEND: {
+    SDValue Src = V.getOperand(0);
+    unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+    // Being conservative here - only peek through if we only demand bits in the
+    // non-extended source (even though the extended bits are technically undef).
+    if (Mask.getActiveBits() > SrcBitWidth)
+      break;
+    APInt SrcMask = Mask.trunc(SrcBitWidth);
+    if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask))
+      return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
+    break;
+  }
+  }
+  return SDValue();
+}
+
 /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
 /// use this predicate to simplify operations downstream.
 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
@@ -1972,6 +2057,30 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
   return Mask.isSubsetOf(Known.Zero);
 }
 
+/// Helper function that checks to see if a node is a constant or a
+/// build vector of splat constants at least within the demanded elts.
+static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N,
+                                                   const APInt &DemandedElts) {
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+    return CN;
+  if (N.getOpcode() != ISD::BUILD_VECTOR)
+    return nullptr;
+  EVT VT = N.getValueType();
+  ConstantSDNode *Cst = nullptr;
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size");
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (!DemandedElts[i])
+      continue;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i));
+    if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) ||
+        C->getValueType(0) != VT.getScalarType())
+      return nullptr;
+    Cst = C;
+  }
+  return Cst;
+}
+
 /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
 /// is less than the element bit-width of the shift node, return it.
 static const APInt *getValidShiftAmountConstant(SDValue V) {
@@ -2005,6 +2114,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   unsigned BitWidth = Op.getScalarValueSizeInBits();
 
   Known = KnownBits(BitWidth);   // Don't know anything.
+
+  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+    // We know all of the bits for a constant!
+    Known.One = C->getAPIntValue();
+    Known.Zero = ~Known.One;
+    return;
+  }
+  if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
+    // We know all of the bits for a constant fp!
+    Known.One = C->getValueAPF().bitcastToAPInt();
+    Known.Zero = ~Known.One;
+    return;
+  }
+
   if (Depth == 6)
     return;  // Limit search depth.
 
@@ -2016,11 +2139,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
 
   unsigned Opcode = Op.getOpcode();
   switch (Opcode) {
-  case ISD::Constant:
-    // We know all of the bits for a constant!
-    Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
-    Known.Zero = ~Known.One;
-    break;
   case ISD::BUILD_VECTOR:
     // Collect the known bits that are shared by every demanded vector element.
     assert(NumElts == Op.getValueType().getVectorNumElements() &&
@@ -2045,7 +2163,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       Known.Zero &= Known2.Zero;
 
       // If we don't know any bits, early out.
-      if (!Known.One && !Known.Zero)
+      if (Known.isUnknown())
         break;
     }
     break;
@@ -2083,7 +2201,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       Known.Zero &= Known2.Zero;
     }
     // If we don't know any bits, early out.
-    if (!Known.One && !Known.Zero)
+    if (Known.isUnknown())
       break;
     if (!!DemandedRHS) {
       SDValue RHS = Op.getOperand(1);
@@ -2109,11 +2227,45 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
         Known.Zero &= Known2.Zero;
       }
       // If we don't know any bits, early out.
-      if (!Known.One && !Known.Zero)
+      if (Known.isUnknown())
         break;
     }
     break;
   }
+  case ISD::INSERT_SUBVECTOR: {
+    // If we know the element index, demand any elements from the subvector and
+    // the remainder from the src its inserted into, otherwise demand them all.
+    SDValue Src = Op.getOperand(0);
+    SDValue Sub = Op.getOperand(1);
+    ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+    unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+    if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+      Known.One.setAllBits();
+      Known.Zero.setAllBits();
+      uint64_t Idx = SubIdx->getZExtValue();
+      APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+      if (!!DemandedSubElts) {
+        computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1);
+        if (Known.isUnknown())
+          break; // early-out.
+      }
+      APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+      APInt DemandedSrcElts = DemandedElts & ~SubMask;
+      if (!!DemandedSrcElts) {
+        computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1);
+        Known.One &= Known2.One;
+        Known.Zero &= Known2.Zero;
+      }
+    } else {
+      computeKnownBits(Sub, Known, Depth + 1);
+      if (Known.isUnknown())
+        break; // early-out.
+      computeKnownBits(Src, Known2, Depth + 1);
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
+    }
+    break;
+  }
   case ISD::EXTRACT_SUBVECTOR: {
     // If we know the element index, just demand that subvector elements,
     // otherwise demand them all.
@@ -2132,10 +2284,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::BITCAST: {
     SDValue N0 = Op.getOperand(0);
-    unsigned SubBitWidth = N0.getScalarValueSizeInBits();
+    EVT SubVT = N0.getValueType();
+    unsigned SubBitWidth = SubVT.getScalarSizeInBits();
 
-    // Ignore bitcasts from floating point.
-    if (!N0.getValueType().isInteger())
+    // Ignore bitcasts from unsupported types.
+    if (!(SubVT.isInteger() || SubVT.isFloatingPoint()))
       break;
 
     // Fast handling of 'identity' bitcasts.
@@ -2193,7 +2346,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
           Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
           Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
           // If we don't know any bits, early out.
-          if (!Known.One && !Known.Zero)
+          if (Known.isUnknown())
             break;
         }
     }
@@ -2264,22 +2417,23 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
   case ISD::SELECT:
-    computeKnownBits(Op.getOperand(2), Known, Depth+1);
+  case ISD::VSELECT:
+    computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1);
     // If we don't know any bits, early out.
-    if (!Known.One && !Known.Zero)
+    if (Known.isUnknown())
       break;
-    computeKnownBits(Op.getOperand(1), Known2, Depth+1);
+    computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1);
 
     // Only known if known in both the LHS and RHS.
     Known.One &= Known2.One;
     Known.Zero &= Known2.Zero;
     break;
   case ISD::SELECT_CC:
-    computeKnownBits(Op.getOperand(3), Known, Depth+1);
+    computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1);
     // If we don't know any bits, early out.
-    if (!Known.One && !Known.Zero)
+    if (Known.isUnknown())
       break;
-    computeKnownBits(Op.getOperand(2), Known2, Depth+1);
+    computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1);
 
     // Only known if known in both the LHS and RHS.
     Known.One &= Known2.One;
@@ -2308,35 +2462,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   case ISD::SHL:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
       computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-      Known.Zero <<= *ShAmt;
-      Known.One <<= *ShAmt;
+      unsigned Shift = ShAmt->getZExtValue();
+      Known.Zero <<= Shift;
+      Known.One <<= Shift;
       // Low bits are known zero.
-      Known.Zero.setLowBits(ShAmt->getZExtValue());
+      Known.Zero.setLowBits(Shift);
     }
     break;
   case ISD::SRL:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
       computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-      Known.Zero.lshrInPlace(*ShAmt);
-      Known.One.lshrInPlace(*ShAmt);
+      unsigned Shift = ShAmt->getZExtValue();
+      Known.Zero.lshrInPlace(Shift);
+      Known.One.lshrInPlace(Shift);
       // High bits are known zero.
-      Known.Zero.setHighBits(ShAmt->getZExtValue());
+      Known.Zero.setHighBits(Shift);
+    } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) {
+      // If the shift amount is a vector of constants see if we can bound
+      // the number of upper zero bits.
+      unsigned ShiftAmountMin = BitWidth;
+      for (unsigned i = 0; i != BV->getNumOperands(); ++i) {
+        if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) {
+          const APInt &ShAmt = C->getAPIntValue();
+          if (ShAmt.ult(BitWidth)) {
+            ShiftAmountMin = std::min<unsigned>(ShiftAmountMin,
+                                                ShAmt.getZExtValue());
+            continue;
+          }
+        }
+        // Don't know anything.
+        ShiftAmountMin = 0;
+        break;
+      }
+
+      Known.Zero.setHighBits(ShiftAmountMin);
     }
     break;
   case ISD::SRA:
     if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
       computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-      Known.Zero.lshrInPlace(*ShAmt);
-      Known.One.lshrInPlace(*ShAmt);
-      // If we know the value of the sign bit, then we know it is copied across
-      // the high bits by the shift amount.
-      APInt SignMask = APInt::getSignMask(BitWidth);
-      SignMask.lshrInPlace(*ShAmt);  // Adjust to where it is now in the mask.
-      if (Known.Zero.intersects(SignMask)) {
-        Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
-      } else if (Known.One.intersects(SignMask)) {
-        Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
-      }
+      unsigned Shift = ShAmt->getZExtValue();
+      // Sign extend known zero/one bit (else is unknown).
+      Known.Zero.ashrInPlace(Shift);
+      Known.One.ashrInPlace(Shift);
     }
     break;
   case ISD::SIGN_EXTEND_INREG: {
@@ -2414,49 +2582,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::ZERO_EXTEND_VECTOR_INREG: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarSizeInBits();
-    Known = Known.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), Known,
-                     DemandedElts.zext(InVT.getVectorNumElements()),
-                     Depth + 1);
+    APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
+    computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1);
     Known = Known.zext(BitWidth);
-    Known.Zero.setBitsFrom(InBits);
+    Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
     break;
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarSizeInBits();
-    Known = Known.trunc(InBits);
     computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
     Known = Known.zext(BitWidth);
-    Known.Zero.setBitsFrom(InBits);
+    Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
     break;
   }
   // TODO ISD::SIGN_EXTEND_VECTOR_INREG
   case ISD::SIGN_EXTEND: {
-    EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarSizeInBits();
-
-    Known = Known.trunc(InBits);
     computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-
     // If the sign bit is known to be zero or one, then sext will extend
     // it to the top bits, else it will just zext.
     Known = Known.sext(BitWidth);
     break;
   }
   case ISD::ANY_EXTEND: {
-    EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarSizeInBits();
-    Known = Known.trunc(InBits);
     computeKnownBits(Op.getOperand(0), Known, Depth+1);
     Known = Known.zext(BitWidth);
     break;
   }
   case ISD::TRUNCATE: {
-    EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarSizeInBits();
-    Known = Known.zext(InBits);
     computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
     Known = Known.trunc(BitWidth);
     break;
@@ -2755,7 +2907,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     computeKnownBits(Op.getOperand(0), Known, DemandedElts,
                      Depth + 1);
     // If we don't know any bits, early out.
-    if (!Known.One && !Known.Zero)
+    if (Known.isUnknown())
       break;
     computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
     Known.Zero &= Known2.Zero;
@@ -2764,11 +2916,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
   }
   case ISD::FrameIndex:
   case ISD::TargetFrameIndex:
-    if (unsigned Align = InferPtrAlignment(Op)) {
-      // The low bits are known zero if the pointer is aligned.
-      Known.Zero.setLowBits(Log2_32(Align));
-      break;
-    }
+    TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
     break;
 
   default:
@@ -2783,7 +2931,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
     break;
   }
 
-  assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+  assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 }
 
 SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
@@ -2873,12 +3021,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
                                           unsigned Depth) const {
   EVT VT = Op.getValueType();
-  assert(VT.isInteger() && "Invalid VT!");
+  assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!");
   unsigned VTBits = VT.getScalarSizeInBits();
   unsigned NumElts = DemandedElts.getBitWidth();
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
+  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+    const APInt &Val = C->getAPIntValue();
+    return Val.getNumSignBits();
+  }
+
   if (Depth == 6)
     return 1;  // Limit search depth.
 
@@ -2894,11 +3047,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
     return VTBits-Tmp;
 
-  case ISD::Constant: {
-    const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
-    return Val.getNumSignBits();
-  }
-
   case ISD::BUILD_VECTOR:
     Tmp = VTBits;
     for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
@@ -2952,32 +3100,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     return Tmp;
   }
 
+  case ISD::BITCAST: {
+    SDValue N0 = Op.getOperand(0);
+    EVT SrcVT = N0.getValueType();
+    unsigned SrcBits = SrcVT.getScalarSizeInBits();
+
+    // Ignore bitcasts from unsupported types..
+    if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint()))
+      break;
+
+    // Fast handling of 'identity' bitcasts.
+    if (VTBits == SrcBits)
+      return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
+
+    // Bitcast 'large element' scalar/vector to 'small element' vector.
+    // TODO: Handle cases other than 'sign splat' when we have a use case.
+    // Requires handling of DemandedElts and Endianness.
+    if ((SrcBits % VTBits) == 0) {
+      assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+      Tmp = ComputeNumSignBits(N0, Depth + 1);
+      if (Tmp == SrcBits)
+        return VTBits;
+    }
+    break;
+  }
+
   case ISD::SIGN_EXTEND:
-  case ISD::SIGN_EXTEND_VECTOR_INREG:
     Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
-    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
-
+    return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp;
   case ISD::SIGN_EXTEND_INREG:
     // Max of the input and what this extends.
     Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
     Tmp = VTBits-Tmp+1;
-
-    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
     return std::max(Tmp, Tmp2);
+  case ISD::SIGN_EXTEND_VECTOR_INREG: {
+    SDValue Src = Op.getOperand(0);
+    EVT SrcVT = Src.getValueType();
+    APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
+    Tmp = VTBits - SrcVT.getScalarSizeInBits();
+    return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
+  }
 
   case ISD::SRA:
     Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
     // SRA X, C   -> adds C sign bits.
-    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+    if (ConstantSDNode *C =
+            isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
       APInt ShiftVal = C->getAPIntValue();
       ShiftVal += Tmp;
       Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
     }
     return Tmp;
   case ISD::SHL:
-    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+    if (ConstantSDNode *C =
+            isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
       // shl destroys sign bits.
-      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
       if (C->getAPIntValue().uge(VTBits) ||      // Bad shift.
           C->getAPIntValue().uge(Tmp)) break;    // Shifted all sign bits out.
       return Tmp - C->getZExtValue();
@@ -2987,9 +3166,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
   case ISD::OR:
   case ISD::XOR:    // NOT is handled here.
     // Logical binary ops preserve the number of sign bits at the worst.
-    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
     if (Tmp != 1) {
-      Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+      Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
       FirstAnswer = std::min(Tmp, Tmp2);
       // We computed what we know about the sign bits as our first
       // answer. Now proceed to the generic code that uses
@@ -2998,15 +3177,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     break;
 
   case ISD::SELECT:
-    Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+  case ISD::VSELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
     if (Tmp == 1) return 1;  // Early out.
-    Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+    Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
     return std::min(Tmp, Tmp2);
   case ISD::SELECT_CC:
-    Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+    Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
     if (Tmp == 1) return 1;  // Early out.
-    Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+    Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1);
     return std::min(Tmp, Tmp2);
+
   case ISD::SMIN:
   case ISD::SMAX:
   case ISD::UMIN:
@@ -3041,16 +3222,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
   case ISD::ROTL:
   case ISD::ROTR:
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+      unsigned RotAmt = C->getAPIntValue().urem(VTBits);
 
       // Handle rotate right by N like a rotate left by 32-N.
       if (Op.getOpcode() == ISD::ROTR)
-        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+        RotAmt = (VTBits - RotAmt) % VTBits;
 
       // If we aren't rotating out all of the known-in sign bits, return the
       // number that are left.  This handles rotl(sext(x), 1) for example.
       Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
-      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+      if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
     }
     break;
   case ISD::ADD:
@@ -3391,7 +3572,9 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
                ? DAG.getZExtOrTrunc(Op, DL, SVT)
                : DAG.getSExtOrTrunc(Op, DL, SVT);
 
-  return DAG.getBuildVector(VT, DL, Elts);
+  SDValue V = DAG.getBuildVector(VT, DL, Elts);
+  NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
+  return V;
 }
 
 /// Gets or creates the specified node.
@@ -3407,7 +3590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
   CSEMap.InsertNode(N, IP);
 
   InsertNode(N);
-  return SDValue(N, 0);
+  SDValue V = SDValue(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -3768,7 +3953,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   }
 
   InsertNode(N);
-  return SDValue(N, 0);
+  SDValue V = SDValue(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
 }
 
 static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
@@ -3906,18 +4093,31 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
 
   EVT SVT = VT.getScalarType();
+  EVT LegalSVT = SVT;
+  if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
+    LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+    if (LegalSVT.bitsLT(SVT))
+      return SDValue();
+  }
   SmallVector<SDValue, 4> Outputs;
   for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
     SDValue V1 = BV1->getOperand(I);
     SDValue V2 = BV2->getOperand(I);
 
-    // Avoid BUILD_VECTOR nodes that perform implicit truncation.
-    // FIXME: This is valid and could be handled by truncation.
+    if (SVT.isInteger()) {
+        if (V1->getValueType(0).bitsGT(SVT))
+          V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
+        if (V2->getValueType(0).bitsGT(SVT))
+          V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+    }
+
     if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
       return SDValue();
 
     // Fold one vector element.
     SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+    if (LegalSVT != SVT)
+      ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
 
     // Scalar folding only succeeded if the result is a constant or UNDEF.
     if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
@@ -3936,6 +4136,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   return getBuildVector(VT, SDLoc(), Outputs);
 }
 
+// TODO: Merge with FoldConstantArithmetic
 SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
                                                    const SDLoc &DL, EVT VT,
                                                    ArrayRef<SDValue> Ops,
@@ -4027,7 +4228,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
     ScalarResults.push_back(ScalarResult);
   }
 
-  return getBuildVector(VT, DL, ScalarResults);
+  SDValue V = getBuildVector(VT, DL, ScalarResults);
+  NewSDValueDbgMsg(V, "New node fold constant vector: ", this);
+  return V;
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4297,6 +4500,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
         return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
       }
     }
+
+    // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
+    // when vector types are scalarized and v1iX is legal.
+    // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx)
+    if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        N1.getValueType().getVectorNumElements() == 1) {
+      return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
+                     N1.getOperand(1));
+    }
     break;
   case ISD::EXTRACT_ELEMENT:
     assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
@@ -4518,7 +4730,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   }
 
   InsertNode(N);
-  return SDValue(N, 0);
+  SDValue V = SDValue(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4553,8 +4767,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       return V;
     // Vector constant folding.
     SDValue Ops[] = {N1, N2, N3};
-    if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+    if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+      NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
       return V;
+    }
     break;
   }
   case ISD::SELECT:
@@ -4626,7 +4842,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   }
 
   InsertNode(N);
-  return SDValue(N, 0);
+  SDValue V = SDValue(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4882,8 +5100,8 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
   // On Darwin, -Os means optimize for size without hurting performance, so
   // only really optimize for size when -Oz (MinSize) is used.
   if (MF.getTarget().getTargetTriple().isOSDarwin())
-    return MF.getFunction()->optForMinSize();
-  return MF.getFunction()->optForSize();
+    return MF.getFunction().optForMinSize();
+  return MF.getFunction().optForSize();
 }
 
 static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
@@ -5558,21 +5776,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
 
 SDValue SelectionDAG::getMemIntrinsicNode(
     unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
-    EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol,
-    bool ReadMem, bool WriteMem, unsigned Size) {
+    EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
+    MachineMemOperand::Flags Flags, unsigned Size) {
   if (Align == 0)  // Ensure that codegen never sees alignment 0
     Align = getEVTAlignment(MemVT);
 
-  MachineFunction &MF = getMachineFunction();
-  auto Flags = MachineMemOperand::MONone;
-  if (WriteMem)
-    Flags |= MachineMemOperand::MOStore;
-  if (ReadMem)
-    Flags |= MachineMemOperand::MOLoad;
-  if (Vol)
-    Flags |= MachineMemOperand::MOVolatile;
   if (!Size)
     Size = MemVT.getStoreSize();
+
+  MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
 
@@ -5597,6 +5809,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
   if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops);
+    ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>(
+        Opcode, dl.getIROrder(), VTList, MemVT, MMO));
     ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
     void *IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5622,7 +5836,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
 /// MachinePointerInfo record from it.  This is particularly useful because the
 /// code generator has many cases where it doesn't bother passing in a
 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+                                           SelectionDAG &DAG, SDValue Ptr,
                                            int64_t Offset = 0) {
   // If this is FI+Offset, we can model it.
   if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
@@ -5633,7 +5848,7 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
   if (Ptr.getOpcode() != ISD::ADD ||
       !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
       !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
-    return MachinePointerInfo();
+    return Info;
 
   int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
   return MachinePointerInfo::getFixedStack(
@@ -5645,14 +5860,15 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
 /// MachinePointerInfo record from it.  This is particularly useful because the
 /// code generator has many cases where it doesn't bother passing in a
 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+                                           SelectionDAG &DAG, SDValue Ptr,
                                            SDValue OffsetOp) {
   // If the 'Offset' value isn't a constant, we can't handle this.
   if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
-    return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
+    return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue());
   if (OffsetOp.isUndef())
-    return InferPointerInfo(DAG, Ptr);
-  return MachinePointerInfo();
+    return InferPointerInfo(Info, DAG, Ptr);
+  return Info;
 }
 
 SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
@@ -5672,7 +5888,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   // If we don't have a PtrInfo, infer the trivial frame index case to simplify
   // clients.
   if (PtrInfo.V.isNull())
-    PtrInfo = InferPointerInfo(*this, Ptr, Offset);
+    PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5791,7 +6007,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
   assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
 
   if (PtrInfo.V.isNull())
-    PtrInfo = InferPointerInfo(*this, Ptr);
+    PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5841,7 +6057,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
   assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
 
   if (PtrInfo.V.isNull())
-    PtrInfo = InferPointerInfo(*this, Ptr);
+    PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -6118,7 +6334,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   }
 
   InsertNode(N);
-  return SDValue(N, 0);
+  SDValue V(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
@@ -6171,7 +6389,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
     createOperands(N, Ops);
   }
   InsertNode(N);
-  return SDValue(N, 0);
+  SDValue V(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
@@ -6580,14 +6800,16 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   unsigned OrigOpc = Node->getOpcode();
   unsigned NewOpc;
   bool IsUnary = false;
+  bool IsTernary = false;
   switch (OrigOpc) {
-  default: 
+  default:
     llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
   case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
   case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
   case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
   case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
   case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+  case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
   case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
   case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
   case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@@ -6614,10 +6836,14 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   SDNode *Res = nullptr;
   if (IsUnary)
     Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+  else if (IsTernary)
+    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+                                           Node->getOperand(2),
+                                           Node->getOperand(3)});
   else
     Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
                                            Node->getOperand(2) });
-  
+
   // MorphNodeTo can operate in two ways: if an existing node with the
   // specified operands exists, it can just return it.  Otherwise, it
   // updates the node in place to have the requested operands.
@@ -6630,7 +6856,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
     RemoveDeadNode(Node);
   }
 
-  return Res; 
+  return Res;
 }
 
 /// getMachineNode - These are used for target selectors to create a new node
@@ -6794,32 +7020,125 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
 /// getDbgValue - Creates a SDDbgValue node.
 ///
 /// SDNode
-SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
-                                      unsigned R, bool IsIndirect, uint64_t Off,
+SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr,
+                                      SDNode *N, unsigned R, bool IsIndirect,
                                       const DebugLoc &DL, unsigned O) {
   assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
   return new (DbgInfo->getAlloc())
-      SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
+      SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O);
 }
 
 /// Constant
-SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
-                                              const Value *C, uint64_t Off,
+SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
+                                              DIExpression *Expr,
+                                              const Value *C,
                                               const DebugLoc &DL, unsigned O) {
   assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
-  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
+  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O);
 }
 
 /// FrameIndex
-SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
-                                                unsigned FI, uint64_t Off,
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
+                                                DIExpression *Expr, unsigned FI,
                                                 const DebugLoc &DL,
                                                 unsigned O) {
   assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
-  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
+  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O);
+}
+
+void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
+                                     unsigned OffsetInBits, unsigned SizeInBits,
+                                     bool InvalidateDbg) {
+  SDNode *FromNode = From.getNode();
+  SDNode *ToNode = To.getNode();
+  assert(FromNode && ToNode && "Can't modify dbg values");
+
+  // PR35338
+  // TODO: assert(From != To && "Redundant dbg value transfer");
+  // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer");
+  if (From == To || FromNode == ToNode)
+    return;
+
+  if (!FromNode->getHasDebugValue())
+    return;
+
+  SmallVector<SDDbgValue *, 2> ClonedDVs;
+  for (SDDbgValue *Dbg : GetDbgValues(FromNode)) {
+    if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+      continue;
+
+    // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value");
+
+    // Just transfer the dbg value attached to From.
+    if (Dbg->getResNo() != From.getResNo())
+      continue;
+
+    DIVariable *Var = Dbg->getVariable();
+    auto *Expr = Dbg->getExpression();
+    // If a fragment is requested, update the expression.
+    if (SizeInBits) {
+      // When splitting a larger (e.g., sign-extended) value whose
+      // lower bits are described with an SDDbgValue, do not attempt
+      // to transfer the SDDbgValue to the upper bits.
+      if (auto FI = Expr->getFragmentInfo())
+        if (OffsetInBits + SizeInBits > FI->SizeInBits)
+          continue;
+      auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits,
+                                                             SizeInBits);
+      if (!Fragment)
+        continue;
+      Expr = *Fragment;
+    }
+    // Clone the SDDbgValue and move it to To.
+    SDDbgValue *Clone =
+        getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(),
+                    Dbg->getDebugLoc(), Dbg->getOrder());
+    ClonedDVs.push_back(Clone);
+
+    if (InvalidateDbg)
+      Dbg->setIsInvalidated();
+  }
+
+  for (SDDbgValue *Dbg : ClonedDVs)
+    AddDbgValue(Dbg, ToNode, false);
+}
+
+void SelectionDAG::salvageDebugInfo(SDNode &N) {
+  if (!N.getHasDebugValue())
+    return;
+  for (auto DV : GetDbgValues(&N)) {
+    if (DV->isInvalidated())
+      continue;
+    switch (N.getOpcode()) {
+    default:
+      break;
+    case ISD::ADD:
+      SDValue N0 = N.getOperand(0);
+      SDValue N1 = N.getOperand(1);
+      if (!isConstantIntBuildVectorOrConstantInt(N0) &&
+          isConstantIntBuildVectorOrConstantInt(N1)) {
+        uint64_t Offset = N.getConstantOperandVal(1);
+        // Rewrite an ADD constant node into a DIExpression. Since we are
+        // performing arithmetic to compute the variable's *value* in the
+        // DIExpression, we need to mark the expression with a
+        // DW_OP_stack_value.
+        auto *DIExpr = DV->getExpression();
+        DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
+                                       DIExpression::NoDeref,
+                                       DIExpression::WithStackValue);
+        SDDbgValue *Clone =
+            getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
+                        DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
+        DV->setIsInvalidated();
+        AddDbgValue(Clone, N0.getNode(), false);
+        DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
+              dbgs() << " into " << *DIExpr << '\n');
+      }
+    }
+  }
 }
 
 namespace {
@@ -6859,7 +7178,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
   assert(From != To.getNode() && "Cannot replace uses of with self");
 
   // Preserve Debug Values
-  TransferDbgValues(FromN, To);
+  transferDbgValues(FromN, To);
 
   // Iterate over all the existing uses of From. New uses will be added
   // to the beginning of the use list, which we avoid visiting.
@@ -6918,7 +7237,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
   for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
     if (From->hasAnyUseOfValue(i)) {
       assert((i < To->getNumValues()) && "Invalid To location");
-      TransferDbgValues(SDValue(From, i), SDValue(To, i));
+      transferDbgValues(SDValue(From, i), SDValue(To, i));
     }
 
   // Iterate over just the existing users of From. See the comments in
@@ -6962,7 +7281,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
 
   // Preserve Debug Info.
   for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
-    TransferDbgValues(SDValue(From, i), *To);
+    transferDbgValues(SDValue(From, i), *To);
 
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
@@ -7009,7 +7328,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
   }
 
   // Preserve Debug Info.
-  TransferDbgValues(From, To);
+  transferDbgValues(From, To);
 
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
@@ -7087,7 +7406,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
   if (Num == 1)
     return ReplaceAllUsesOfValueWith(*From, *To);
 
-  TransferDbgValues(*From, *To);
+  transferDbgValues(*From, *To);
 
   // Read up all the uses and make records of them. This helps
   // processing new uses that are introduced during the
@@ -7236,35 +7555,6 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
   DbgInfo->add(DB, SD, isParameter);
 }
 
-/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes.
-void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
-  if (From == To || !From.getNode()->getHasDebugValue())
-    return;
-  SDNode *FromNode = From.getNode();
-  SDNode *ToNode = To.getNode();
-  ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
-  SmallVector<SDDbgValue *, 2> ClonedDVs;
-  for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
-       I != E; ++I) {
-    SDDbgValue *Dbg = *I;
-    // Only add Dbgvalues attached to same ResNo.
-    if (Dbg->getKind() == SDDbgValue::SDNODE &&
-        Dbg->getSDNode() == From.getNode() &&
-        Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
-      assert(FromNode != ToNode &&
-             "Should not transfer Debug Values intranode");
-      SDDbgValue *Clone =
-          getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
-                      To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
-                      Dbg->getDebugLoc(), Dbg->getOrder());
-      ClonedDVs.push_back(Clone);
-      Dbg->setIsInvalidated();
-    }
-  }
-  for (SDDbgValue *I : ClonedDVs)
-    AddDbgValue(I, ToNode, false);
-}
-
 SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
                                                    SDValue NewMemOp) {
   assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 0d69441ebb7f..544da362be69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -1,5 +1,4 @@
-//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address
-//Analysis ---*- C++ -*-===//
+//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,15 +6,18 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
 
 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/Casting.h"
+#include <cstdint>
 
-namespace llvm {
+using namespace llvm;
 
 bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
                                      const SelectionDAG &DAG, int64_t &Off) {
@@ -55,7 +57,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
 /// Parses tree in Ptr for base, index, offset addresses.
 BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
   // (((B + I*M) + c)) + c ...
-  SDValue Base = Ptr;
+  SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
   SDValue Index = SDValue();
   int64_t Offset = 0;
   bool IsIndexSignExt = false;
@@ -112,4 +114,3 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
   }
   return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
 }
-} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 127312076207..71cb8cb78f6d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,63 +12,113 @@
 //===----------------------------------------------------------------------===//
 
 #include "SelectionDAGBuilder.h"
-#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <tuple>
 #include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "isel"
@@ -78,11 +128,18 @@ using namespace llvm;
 static unsigned LimitFloatPrecision;
 
 static cl::opt<unsigned, true>
-LimitFPPrecision("limit-float-precision",
-                 cl::desc("Generate low-precision inline sequences "
-                          "for some float libcalls"),
-                 cl::location(LimitFloatPrecision),
-                 cl::init(0));
+    LimitFPPrecision("limit-float-precision",
+                     cl::desc("Generate low-precision inline sequences "
+                              "for some float libcalls"),
+                     cl::location(LimitFloatPrecision), cl::Hidden,
+                     cl::init(0));
+
+static cl::opt<unsigned> SwitchPeelThreshold(
+    "switch-peel-threshold", cl::Hidden, cl::init(66),
+    cl::desc("Set the case probability threshold for peeling the case from a "
+             "switch statement. A value greater than 100 will void this "
+             "optimization"));
+
 // Limit the width of DAG chains. This is important in general to prevent
 // DAG-based analysis from blowing up. For example, alias analysis and
 // load clustering may not complete in reasonable time. It is difficult to
@@ -101,7 +158,7 @@ static const unsigned MaxParallelChains = 64;
 
 // True if the Value passed requires ABI mangling as it is a parameter to a
 // function or a return value from a function which is not an intrinsic.
-static bool isABIRegCopy(const Value * V) {
+static bool isABIRegCopy(const Value *V) {
   const bool IsRetInst = V && isa<ReturnInst>(V);
   const bool IsCallInst = V && isa<CallInst>(V);
   const bool IsInLineAsm =
@@ -554,7 +611,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
                                  MVT PartVT, const Value *V,
                                  bool IsABIRegCopy) {
-
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -600,7 +656,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
         Val = DAG.getNode(
             ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
             DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
-
       } else {
         assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
                "lossy conversion of vector to scalar type");
@@ -677,8 +732,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
   }
 }
 
-RegsForValue::RegsForValue() { IsABIMangled = false; }
-
 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
                            EVT valuevt, bool IsABIMangledValue)
     : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
@@ -888,7 +941,24 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
   SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
   Ops.push_back(Res);
 
-  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+  if (Code == InlineAsm::Kind_Clobber) {
+    // Clobbers should always have a 1:1 mapping with registers, and may
+    // reference registers that have illegal (e.g. vector) types. Hence, we
+    // shouldn't try to apply any sort of splitting logic to them.
+    assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
+           "No 1:1 mapping from clobbers to regs?");
+    unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+    (void)SP;
+    for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
+      Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
+      assert(
+          (Regs[I] != SP ||
+           DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
+          "If we clobbered the stack pointer, MFI should know about it.");
+    }
+    return;
+  }
+
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
     MVT RegisterVT = RegVTs[Value];
@@ -896,11 +966,6 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
       unsigned TheReg = Regs[Reg++];
       Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
-
-      if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
-        // If we clobbered the stack pointer, MFI should know about it.
-        assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment());
-      }
     }
   }
 }
@@ -1025,12 +1090,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     DIExpression *Expr = DI->getExpression();
     assert(Variable->isValidLocationForIntrinsic(dl) &&
            "Expected inlined-at fields to agree");
-    uint64_t Offset = DI->getOffset();
     SDDbgValue *SDV;
     if (Val.getNode()) {
-      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
-                                    Val)) {
-        SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder);
+      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
+        SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
     } else
@@ -1409,7 +1472,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     // Leave Outs empty so that LowerReturn won't try to load return
     // registers the usual way.
     SmallVector<EVT, 1> PtrValueVTs;
-    ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
+    ComputeValueVTs(TLI, DL,
+                    F->getReturnType()->getPointerTo(
+                        DAG.getDataLayout().getAllocaAddrSpace()),
                     PtrValueVTs);
 
     SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
@@ -1421,22 +1486,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
     unsigned NumValues = ValueVTs.size();
 
-    // An aggregate return value cannot wrap around the address space, so
-    // offsets to its parts don't wrap either.
-    SDNodeFlags Flags;
-    Flags.setNoUnsignedWrap(true);
-
     SmallVector<SDValue, 4> Chains(NumValues);
     for (unsigned i = 0; i != NumValues; ++i) {
-      SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
-                                RetPtr.getValueType(), RetPtr,
-                                DAG.getIntPtrConstant(Offsets[i],
-                                                      getCurSDLoc()),
-                                Flags);
-      Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
-                               SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                               // FIXME: better loc info would be nice.
-                               Add, MachinePointerInfo());
+      // An aggregate return value cannot wrap around the address space, so
+      // offsets to its parts don't wrap either.
+      SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
+      Chains[i] = DAG.getStore(
+          Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+          // FIXME: better loc info would be nice.
+          Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
     }
 
     Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
@@ -1515,9 +1573,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                         EVT(TLI.getPointerTy(DL))));
   }
 
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+  bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
   CallingConv::ID CallConv =
-    DAG.getMachineFunction().getFunction()->getCallingConv();
+    DAG.getMachineFunction().getFunction().getCallingConv();
   Chain = DAG.getTargetLoweringInfo().LowerReturn(
       Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
 
@@ -1623,7 +1681,6 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
 /// This function emits a branch and is used at the leaves of an OR or an
 /// AND operator tree.
-///
 void
 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
                                                   MachineBasicBlock *TBB,
@@ -1659,7 +1716,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
       }
 
       CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
-                   TBB, FBB, CurBB, TProb, FProb);
+                   TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
       SwitchCases.push_back(CB);
       return;
     }
@@ -1668,7 +1725,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
   // Create a CaseBlock record representing this branch.
   ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
   CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
-               nullptr, TBB, FBB, CurBB, TProb, FProb);
+               nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
   SwitchCases.push_back(CB);
 }
 
@@ -1712,7 +1769,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
 
   // If this node is not part of the or/and tree, emit it as a branch.
   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
-      BOpc != Opc || !BOp->hasOneUse() ||
+      BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
       BOp->getParent() != CurBB->getBasicBlock() ||
       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
@@ -1867,7 +1924,6 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   //     je foo
   //     cmp D, E
   //     jle foo
-  //
   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
     Instruction::BinaryOps Opcode = BOp->getOpcode();
     if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
@@ -1907,7 +1963,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
 
   // Create a CaseBlock record representing this branch.
   CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
-               nullptr, Succ0MBB, Succ1MBB, BrMBB);
+               nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
 
   // Use visitSwitchCase to actually insert the fast branch sequence for this
   // cond branch.
@@ -1920,7 +1976,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
                                           MachineBasicBlock *SwitchBB) {
   SDValue Cond;
   SDValue CondLHS = getValue(CB.CmpLHS);
-  SDLoc dl = getCurSDLoc();
+  SDLoc dl = CB.DL;
 
   // Build the setcc now.
   if (!CB.CmpMHS) {
@@ -2054,7 +2110,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
   MachineFunction &MF = DAG.getMachineFunction();
-  Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
+  Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
   MachineSDNode *Node =
       DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
   if (Global) {
@@ -2088,15 +2144,18 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   SDValue Guard;
   SDLoc dl = getCurSDLoc();
   SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
-  const Module &M = *ParentBB->getParent()->getFunction()->getParent();
+  const Module &M = *ParentBB->getParent()->getFunction().getParent();
   unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
 
   // Generate code to load the content of the guard slot.
-  SDValue StackSlot = DAG.getLoad(
+  SDValue GuardVal = DAG.getLoad(
       PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
       MachineMemOperand::MOVolatile);
 
+  if (TLI.useStackGuardXorFP())
+    GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
+
   // Retrieve guard check function, nullptr if instrumentation is inlined.
   if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
     // The target provides a guard check function to validate the guard value.
@@ -2108,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
 
     TargetLowering::ArgListTy Args;
     TargetLowering::ArgListEntry Entry;
-    Entry.Node = StackSlot;
+    Entry.Node = GuardVal;
     Entry.Ty = FnTy->getParamType(0);
     if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
       Entry.IsInReg = true;
@@ -2141,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
 
   // Perform the comparison via a subtract/getsetcc.
   EVT VT = Guard.getValueType();
-  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
 
   SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
                                                         *DAG.getContext(),
@@ -2151,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   // If the sub is not 0, then we know the guard/stackslot do not equal, so
   // branch to failure MBB.
   SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
-                               MVT::Other, StackSlot.getOperand(0),
+                               MVT::Other, GuardVal.getOperand(0),
                                Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
   // Otherwise branch to success MBB.
   SDValue Br = DAG.getNode(ISD::BR, dl,
@@ -2530,7 +2589,7 @@ static bool isVectorReductionOp(const User *I) {
   case Instruction::FAdd:
   case Instruction::FMul:
     if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
-      if (FPOp->getFastMathFlags().unsafeAlgebra())
+      if (FPOp->getFastMathFlags().isFast())
         break;
     LLVM_FALLTHROUGH;
   default:
@@ -2576,7 +2635,7 @@ static bool isVectorReductionOp(const User *I) {
 
       if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
         if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
-          if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra())
+          if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
             return false;
         UsersToVisit.push_back(U);
       } else if (const ShuffleVectorInst *ShufInst =
@@ -2670,7 +2729,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   Flags.setNoInfs(FMF.noInfs());
   Flags.setNoNaNs(FMF.noNaNs());
   Flags.setNoSignedZeros(FMF.noSignedZeros());
-  Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+  Flags.setUnsafeAlgebra(FMF.isFast());
 
   SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
                                      Op1, Op2, Flags);
@@ -2779,7 +2838,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
 // Check if the condition of the select has one use or two users that are both
 // selects with the same condition.
 static bool hasOnlySelectUsers(const Value *Cond) {
-  return all_of(Cond->users(), [](const Value *V) {
+  return llvm::all_of(Cond->users(), [](const Value *V) {
     return isa<SelectInst>(V);
   });
 }
@@ -3447,7 +3506,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
 
   SDValue AllocSize = getValue(I.getArraySize());
 
-  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
+  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
   if (AllocSize.getValueType() != IntPtr)
     AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
 
@@ -3468,17 +3527,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   // an address inside an alloca.
   SDNodeFlags Flags;
   Flags.setNoUnsignedWrap(true);
-  AllocSize = DAG.getNode(ISD::ADD, dl,
-                          AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign - 1, dl), Flags);
+  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+                          DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
 
   // Mask out the low bits for alignment purposes.
-  AllocSize = DAG.getNode(ISD::AND, dl,
-                          AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
-                                                dl));
+  AllocSize =
+      DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+                  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
 
-  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
+  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
   SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
   SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
   setValue(&I, DSA);
@@ -3807,18 +3864,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
 //
 // When the first GEP operand is a single pointer - it is the uniform base we
 // are looking for. If first operand of the GEP is a splat vector - we
-// extract the spalt value and use it as a uniform base.
+// extract the splat value and use it as a uniform base.
 // In all other cases the function returns 'false'.
-//
 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
                            SelectionDAGBuilder* SDB) {
-
   SelectionDAG& DAG = SDB->DAG;
   LLVMContext &Context = *DAG.getContext();
 
   assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
   const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-  if (!GEP || GEP->getNumOperands() > 2)
+  if (!GEP)
     return false;
 
   const Value *GEPPtr = GEP->getPointerOperand();
@@ -3827,7 +3882,15 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
   else if (!(Ptr = getSplatValue(GEPPtr)))
     return false;
 
-  Value *IndexVal = GEP->getOperand(1);
+  unsigned FinalIndex = GEP->getNumOperands() - 1;
+  Value *IndexVal = GEP->getOperand(FinalIndex);
+
+  // Ensure all the other indices are 0.
+  for (unsigned i = 1; i < FinalIndex; ++i) {
+    auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!C || !C->isZero())
+      return false;
+  }
 
   // The operands of the GEP may be defined in another basic block.
   // In this case we'll not find nodes for the operands.
@@ -3837,13 +3900,6 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
   Base = SDB->getValue(Ptr);
   Index = SDB->getValue(IndexVal);
 
-  // Suppress sign extension.
-  if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
-    if (SDB->findValue(Sext->getOperand(0))) {
-      IndexVal = Sext->getOperand(0);
-      Index = SDB->getValue(IndexVal);
-    }
-  }
   if (!Index.getValueType().isVector()) {
     unsigned GEPWidth = GEP->getType()->getVectorNumElements();
     EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
@@ -4082,7 +4138,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
-  if (I.getAlignment() < VT.getSizeInBits() / 8)
+  if (!TLI.supportsUnalignedAtomics() &&
+      I.getAlignment() < VT.getStoreSize())
     report_fatal_error("Cannot generate unaligned atomic load");
 
   MachineMemOperand *MMO =
@@ -4118,7 +4175,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
   EVT VT =
       TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
 
-  if (I.getAlignment() < VT.getSizeInBits() / 8)
+  if (I.getAlignment() < VT.getStoreSize())
     report_fatal_error("Cannot generate unaligned atomic store");
 
   SDValue OutChain =
@@ -4157,7 +4214,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Info is set by getTgtMemInstrinsic
   TargetLowering::IntrinsicInfo Info;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
+                                               DAG.getMachineFunction(),
+                                               Intrinsic);
 
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
@@ -4183,11 +4242,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   SDValue Result;
   if (IsTgtIntrinsic) {
     // This is target intrinsic that touches memory
-    Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
-                                     VTs, Ops, Info.memVT,
-                                   MachinePointerInfo(Info.ptrVal, Info.offset),
-                                     Info.align, Info.vol,
-                                     Info.readMem, Info.writeMem, Info.size);
+    Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
+      Ops, Info.memVT,
+      MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
+      Info.flags, Info.size);
   } else if (!HasChain) {
     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
   } else if (!I.getType()->isVoidTy()) {
@@ -4370,7 +4428,6 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
                          const TargetLowering &TLI) {
-
   // TODO: What fast-math-flags should be set on the floating-point nodes?
 
   if (Op.getValueType() == MVT::f32 &&
@@ -4469,7 +4526,6 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
                           const TargetLowering &TLI) {
-
   // TODO: What fast-math-flags should be set on the floating-point nodes?
 
   if (Op.getValueType() == MVT::f32 &&
@@ -4567,7 +4623,6 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
                            const TargetLowering &TLI) {
-
   // TODO: What fast-math-flags should be set on the floating-point nodes?
 
   if (Op.getValueType() == MVT::f32 &&
@@ -4695,7 +4750,6 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
   return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
 }
 
-
 /// ExpandPowI - Expand a llvm.powi intrinsic.
 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
                           SelectionDAG &DAG) {
@@ -4712,8 +4766,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
     if (Val == 0)
       return DAG.getConstantFP(1.0, DL, LHS.getValueType());
 
-    const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->optForSize() ||
+    const Function &F = DAG.getMachineFunction().getFunction();
+    if (!F.optForSize() ||
         // If optimizing for size, don't insert too many multiplies.
         // This inserts up to 5 multiplies.
         countPopulation(Val) + Log2_32(Val) < 7) {
@@ -4766,12 +4820,12 @@ static unsigned getUnderlyingArgReg(const SDValue &N) {
   }
 }
 
-/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
-/// argument, create the corresponding DBG_VALUE machine instruction for it now.
-/// At the end of instruction selection, they will be inserted to the entry BB.
+/// If the DbgValueInst is a dbg_value of a function argument, create the
+/// corresponding DBG_VALUE machine instruction for it now.  At the end of
+/// instruction selection, they will be inserted to the entry BB.
 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     const Value *V, DILocalVariable *Variable, DIExpression *Expr,
-    DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) {
+    DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
   const Argument *Arg = dyn_cast<Argument>(V);
   if (!Arg)
     return false;
@@ -4779,17 +4833,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
-  // Ignore inlined function arguments here.
-  //
-  // FIXME: Should we be checking DL->inlinedAt() to determine this?
-  if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
-    return false;
-
   bool IsIndirect = false;
   Optional<MachineOperand> Op;
   // Some arguments' frame index is recorded during argument lowering.
   int FI = FuncInfo.getArgumentFrameIndex(Arg);
-  if (FI != INT_MAX)
+  if (FI != std::numeric_limits<int>::max())
     Op = MachineOperand::CreateFI(FI);
 
   if (!Op && N.getNode()) {
@@ -4806,22 +4854,48 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     }
   }
 
+  if (!Op && N.getNode())
+    // Check if frame index is available.
+    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+      if (FrameIndexSDNode *FINode =
+          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+        Op = MachineOperand::CreateFI(FINode->getIndex());
+
   if (!Op) {
     // Check if ValueMap has reg number.
     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
     if (VMI != FuncInfo.ValueMap.end()) {
+      const auto &TLI = DAG.getTargetLoweringInfo();
+      RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
+                       V->getType(), isABIRegCopy(V));
+      unsigned NumRegs =
+          std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
+      if (NumRegs > 1) {
+        unsigned I = 0;
+        unsigned Offset = 0;
+        auto RegisterVT = RFV.RegVTs.begin();
+        for (auto RegCount : RFV.RegCount) {
+          unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
+          for (unsigned E = I + RegCount; I != E; ++I) {
+            // The vregs are guaranteed to be allocated in sequence.
+            Op = MachineOperand::CreateReg(VMI->second + I, false);
+            auto FragmentExpr = DIExpression::createFragmentExpression(
+                Expr, Offset, RegisterSize);
+            if (!FragmentExpr)
+              continue;
+            FuncInfo.ArgDbgValues.push_back(
+                BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
+                        Op->getReg(), Variable, *FragmentExpr));
+            Offset += RegisterSize;
+          }
+        }
+        return true;
+      }
       Op = MachineOperand::CreateReg(VMI->second, false);
       IsIndirect = IsDbgDeclare;
     }
   }
 
-  if (!Op && N.getNode())
-    // Check if frame index is available.
-    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
-      if (FrameIndexSDNode *FINode =
-          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
-        Op = MachineOperand::CreateFI(FINode->getIndex());
-
   if (!Op)
     return false;
 
@@ -4830,12 +4904,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   if (Op->isReg())
     FuncInfo.ArgDbgValues.push_back(
         BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
-                Op->getReg(), Offset, Variable, Expr));
+                Op->getReg(), Variable, Expr));
   else
     FuncInfo.ArgDbgValues.push_back(
         BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
             .add(*Op)
-            .addImm(Offset)
+            .addImm(0)
             .addMetadata(Variable)
             .addMetadata(Expr));
 
@@ -4845,18 +4919,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
 /// Return the appropriate SDDbgValue based on N.
 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
                                              DILocalVariable *Variable,
-                                             DIExpression *Expr, int64_t Offset,
+                                             DIExpression *Expr,
                                              const DebugLoc &dl,
                                              unsigned DbgSDNodeOrder) {
   if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
     // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
     // stack slot locations as such instead of as indirectly addressed
     // locations.
-    return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl,
+    return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
                                      DbgSDNodeOrder);
   }
-  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
-                         Offset, dl, DbgSDNodeOrder);
+  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
+                         DbgSDNodeOrder);
 }
 
 // VisualStudio defines setjmp as _setjmp
@@ -4971,8 +5045,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
   case Intrinsic::memcpy_element_unordered_atomic: {
-    const ElementUnorderedAtomicMemCpyInst &MI =
-        cast<ElementUnorderedAtomicMemCpyInst>(I);
+    const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
     SDValue Dst = getValue(MI.getRawDest());
     SDValue Src = getValue(MI.getRawSource());
     SDValue Length = getValue(MI.getLength());
@@ -5010,7 +5083,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
   case Intrinsic::memmove_element_unordered_atomic: {
-    auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I);
+    auto &MI = cast<AtomicMemMoveInst>(I);
     SDValue Dst = getValue(MI.getRawDest());
     SDValue Src = getValue(MI.getRawSource());
     SDValue Length = getValue(MI.getLength());
@@ -5048,7 +5121,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
   case Intrinsic::memset_element_unordered_atomic: {
-    auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I);
+    auto &MI = cast<AtomicMemSetInst>(I);
     SDValue Dst = getValue(MI.getRawDest());
     SDValue Val = getValue(MI.getValue());
     SDValue Length = getValue(MI.getLength());
@@ -5086,30 +5159,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(CallResult.second);
     return nullptr;
   }
+  case Intrinsic::dbg_addr:
   case Intrinsic::dbg_declare: {
-    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
     DILocalVariable *Variable = DI.getVariable();
     DIExpression *Expression = DI.getExpression();
-    const Value *Address = DI.getAddress();
     assert(Variable && "Missing variable");
-    if (!Address) {
-      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
-      return nullptr;
-    }
 
     // Check if address has undef value.
-    if (isa<UndefValue>(Address) ||
+    const Value *Address = DI.getVariableLocation();
+    if (!Address || isa<UndefValue>(Address) ||
         (Address->use_empty() && !isa<Argument>(Address))) {
       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return nullptr;
     }
 
-    // Byval arguments with frame indices were already handled after argument
-    // lowering and before isel.
-    const auto *Arg =
-        dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
-    if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+    bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+
+    // Check if this variable can be described by a frame index, typically
+    // either as a static alloca or a byval parameter.
+    int FI = std::numeric_limits<int>::max();
+    if (const auto *AI =
+            dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
+      if (AI->isStaticAlloca()) {
+        auto I = FuncInfo.StaticAllocaMap.find(AI);
+        if (I != FuncInfo.StaticAllocaMap.end())
+          FI = I->second;
+      }
+    } else if (const auto *Arg = dyn_cast<Argument>(
+                   Address->stripInBoundsConstantOffsets())) {
+      FI = FuncInfo.getArgumentFrameIndex(Arg);
+    }
+
+    // llvm.dbg.addr is control dependent and always generates indirect
+    // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
+    // the MachineFunction variable table.
+    if (FI != std::numeric_limits<int>::max()) {
+      if (Intrinsic == Intrinsic::dbg_addr)
+        DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl,
+                                                  SDNodeOrder),
+                        getRoot().getNode(), isParameter);
       return nullptr;
+    }
 
     SDValue &N = NodeMap[Address];
     if (!N.getNode() && isa<Argument>(Address))
@@ -5120,26 +5211,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
       // Parameters are handled specially.
-      bool isParameter = Variable->isParameter() || isa<Argument>(Address);
       auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
       if (isParameter && FINode) {
         // Byval parameter. We have a frame index at this point.
         SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
-                                        FINode->getIndex(), 0, dl, SDNodeOrder);
+                                        FINode->getIndex(), dl, SDNodeOrder);
       } else if (isa<Argument>(Address)) {
         // Address is an argument, so try to emit its dbg value using
         // virtual register info from the FuncInfo.ValueMap.
-        EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N);
+        EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
         return nullptr;
       } else {
         SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
-                              true, 0, dl, SDNodeOrder);
+                              true, dl, SDNodeOrder);
       }
       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
     } else {
       // If Address is an argument then try to emit its dbg value using
       // virtual register info from the FuncInfo.ValueMap.
-      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true,
+      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
                                     N)) {
         DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
@@ -5152,15 +5242,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     DILocalVariable *Variable = DI.getVariable();
     DIExpression *Expression = DI.getExpression();
-    uint64_t Offset = DI.getOffset();
     const Value *V = DI.getValue();
     if (!V)
       return nullptr;
 
     SDDbgValue *SDV;
     if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
-      SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl,
-                                    SDNodeOrder);
+      SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
       DAG.AddDbgValue(SDV, nullptr, false);
       return nullptr;
     }
@@ -5171,10 +5259,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
       N = UnusedArgNodeMap[V];
     if (N.getNode()) {
-      if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false,
-                                   N))
+      if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
         return nullptr;
-      SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
+      SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
       DAG.AddDbgValue(SDV, N.getNode(), false);
       return nullptr;
     }
@@ -5213,12 +5300,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::eh_unwind_init:
     DAG.getMachineFunction().setCallsUnwindInit(true);
     return nullptr;
-  case Intrinsic::eh_dwarf_cfa: {
+  case Intrinsic::eh_dwarf_cfa:
     setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return nullptr;
-  }
   case Intrinsic::eh_sjlj_callsite: {
     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
     ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
@@ -5247,17 +5333,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Op.getValue(1));
     return nullptr;
   }
-  case Intrinsic::eh_sjlj_longjmp: {
+  case Intrinsic::eh_sjlj_longjmp:
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
                             getRoot(), getValue(I.getArgOperand(0))));
     return nullptr;
-  }
-  case Intrinsic::eh_sjlj_setup_dispatch: {
+  case Intrinsic::eh_sjlj_setup_dispatch:
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
                             getRoot()));
     return nullptr;
-  }
-
   case Intrinsic::masked_gather:
     visitMaskedGather(I);
     return nullptr;
@@ -5430,6 +5513,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::experimental_constrained_fmul:
   case Intrinsic::experimental_constrained_fdiv:
   case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_fma:
   case Intrinsic::experimental_constrained_sqrt:
   case Intrinsic::experimental_constrained_pow:
   case Intrinsic::experimental_constrained_powi:
@@ -5534,11 +5618,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Res.getValue(1));
     return nullptr;
   }
-  case Intrinsic::stackrestore: {
+  case Intrinsic::stackrestore:
     Res = getValue(I.getArgOperand(0));
     DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
     return nullptr;
-  }
   case Intrinsic::get_dynamic_area_offset: {
     SDValue Op = getRoot();
     EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
@@ -5557,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::stackguard: {
     EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
     MachineFunction &MF = DAG.getMachineFunction();
-    const Module &M = *MF.getFunction()->getParent();
+    const Module &M = *MF.getFunction().getParent();
     SDValue Chain = getRoot();
     if (TLI.useLoadStackGuardNode()) {
       Res = getLoadStackGuard(DAG, sdl, Chain);
@@ -5568,6 +5651,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                         MachinePointerInfo(Global, 0), Align,
                         MachineMemOperand::MOVolatile);
     }
+    if (TLI.useStackGuardXorFP())
+      Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
     DAG.setRoot(Chain);
     setValue(&I, Res);
     return nullptr;
@@ -5624,9 +5709,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   case Intrinsic::assume:
   case Intrinsic::var_annotation:
-    // Discard annotate attributes and assumptions
+  case Intrinsic::sideeffect:
+    // Discard annotate attributes, assumptions, and artificial side-effects.
     return nullptr;
 
+  case Intrinsic::codeview_annotation: {
+    // Emit a label associated with this metadata.
+    MachineFunction &MF = DAG.getMachineFunction();
+    MCSymbol *Label =
+        MF.getMMI().getContext().createTempSymbol("annotation", true);
+    Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+    MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
+    Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
+    DAG.setRoot(Res);
+    return nullptr;
+  }
+
   case Intrinsic::init_trampoline: {
     const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
 
@@ -5643,17 +5741,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Res);
     return nullptr;
   }
-  case Intrinsic::adjust_trampoline: {
+  case Intrinsic::adjust_trampoline:
     setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return nullptr;
-  }
   case Intrinsic::gcroot: {
-    MachineFunction &MF = DAG.getMachineFunction();
-    const Function *F = MF.getFunction();
-    (void)F;
-    assert(F->hasGC() &&
+    assert(DAG.getMachineFunction().getFunction().hasGC() &&
            "only valid in functions with gc specified, enforced by Verifier");
     assert(GFI && "implied by previous");
     const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
@@ -5670,11 +5764,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
     return nullptr;
 
-  case Intrinsic::expect: {
+  case Intrinsic::expect:
     // Just replace __builtin_expect(exp, c) with EXP.
     setValue(&I, getValue(I.getArgOperand(0)));
     return nullptr;
-  }
 
   case Intrinsic::debugtrap:
   case Intrinsic::trap: {
@@ -5728,6 +5821,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::prefetch: {
     SDValue Ops[5];
     unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+    auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
     Ops[0] = getRoot();
     Ops[1] = getValue(I.getArgOperand(0));
     Ops[2] = getValue(I.getArgOperand(1));
@@ -5738,9 +5832,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                         EVT::getIntegerVT(*Context, 8),
                                         MachinePointerInfo(I.getArgOperand(0)),
                                         0, /* align */
-                                        false, /* volatile */
-                                        rw==0, /* read */
-                                        rw==1)); /* write */
+                                        Flags));
     return nullptr;
   }
   case Intrinsic::lifetime_start:
@@ -5792,27 +5884,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::donothing:
     // ignore
     return nullptr;
-  case Intrinsic::experimental_stackmap: {
+  case Intrinsic::experimental_stackmap:
     visitStackmap(I);
     return nullptr;
-  }
   case Intrinsic::experimental_patchpoint_void:
-  case Intrinsic::experimental_patchpoint_i64: {
+  case Intrinsic::experimental_patchpoint_i64:
     visitPatchpoint(&I);
     return nullptr;
-  }
-  case Intrinsic::experimental_gc_statepoint: {
+  case Intrinsic::experimental_gc_statepoint:
     LowerStatepoint(ImmutableStatepoint(&I));
     return nullptr;
-  }
-  case Intrinsic::experimental_gc_result: {
+  case Intrinsic::experimental_gc_result:
     visitGCResult(cast<GCResultInst>(I));
     return nullptr;
-  }
-  case Intrinsic::experimental_gc_relocate: {
+  case Intrinsic::experimental_gc_relocate:
     visitGCRelocate(cast<GCRelocateInst>(I));
     return nullptr;
-  }
   case Intrinsic::instrprof_increment:
     llvm_unreachable("instrprof failed to lower an increment");
   case Intrinsic::instrprof_value_profile:
@@ -5851,7 +5938,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Get the symbol that defines the frame offset.
     auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
     auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
-    unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
+    unsigned IdxVal =
+        unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
     MCSymbol *FrameAllocSym =
         MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
             GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
@@ -5932,12 +6020,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::experimental_vector_reduce_umax:
   case Intrinsic::experimental_vector_reduce_umin:
   case Intrinsic::experimental_vector_reduce_fmax:
-  case Intrinsic::experimental_vector_reduce_fmin: {
+  case Intrinsic::experimental_vector_reduce_fmin:
     visitVectorReduce(I, Intrinsic);
     return nullptr;
   }
-
-  }
 }
 
 void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
@@ -5961,6 +6047,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   case Intrinsic::experimental_constrained_frem:
     Opcode = ISD::STRICT_FREM;
     break;
+  case Intrinsic::experimental_constrained_fma:
+    Opcode = ISD::STRICT_FMA;
+    break;
   case Intrinsic::experimental_constrained_sqrt:
     Opcode = ISD::STRICT_FSQRT;
     break;
@@ -6007,10 +6096,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
   SDVTList VTs = DAG.getVTList(ValueVTs);
   SDValue Result;
   if (FPI.isUnaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                          { Chain, getValue(FPI.getArgOperand(0)) });
+  else if (FPI.isTernaryOp())
+    Result = DAG.getNode(Opcode, sdl, VTs,
+                         { Chain, getValue(FPI.getArgOperand(0)),
+                                  getValue(FPI.getArgOperand(1)),
+                                  getValue(FPI.getArgOperand(2)) });
   else
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                          { Chain, getValue(FPI.getArgOperand(0)),
                            getValue(FPI.getArgOperand(1))  });
 
@@ -6081,7 +6175,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
     if (MF.hasEHFunclets()) {
       assert(CLI.CS);
       WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
-      EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+      EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
                                 BeginLabel, EndLabel);
     } else {
       MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
@@ -6189,7 +6283,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
                              SelectionDAGBuilder &Builder) {
-
   // Check to see if this load can be trivially constant folded, e.g. if the
   // input is from a string literal.
   if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
@@ -6553,10 +6646,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
 
     // Check for well-known libc/libm calls.  If the function is internal, it
     // can't be a library call.  Don't do the check if marked as nobuiltin for
-    // some reason.
+    // some reason or the call site requires strict floating point semantics.
     LibFunc Func;
-    if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
-        LibInfo->getLibFunc(*F, Func) &&
+    if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
+        F->hasName() && LibInfo->getLibFunc(*F, Func) &&
         LibInfo->hasOptimizedCodeGen(Func)) {
       switch (Func) {
       default: break;
@@ -6735,7 +6828,7 @@ public:
   RegsForValue AssignedRegs;
 
   explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
-    : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
+    : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
   }
 
   /// Whether or not this operand accesses memory
@@ -6767,7 +6860,7 @@ public:
     // If this is an indirect operand, the operand is a pointer to the
     // accessed type.
     if (isIndirect) {
-      llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+      PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
       if (!PtrTy)
         report_fatal_error("Indirect operand for inline asm not a pointer!");
       OpTy = PtrTy->getElementType();
@@ -6799,7 +6892,7 @@ public:
   }
 };
 
-typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
 
 } // end anonymous namespace
 
@@ -6879,7 +6972,6 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
 /// allocation.  This produces generally horrible, but correct, code.
 ///
 ///   OpInfo describes the operand.
-///
 static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
                                  const SDLoc &DL,
                                  SDISelAsmOperandInfo &OpInfo) {
@@ -7013,6 +7105,8 @@ static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
   return true;
 }
 
+namespace {
+
 class ExtraFlags {
   unsigned Flags = 0;
 
@@ -7028,7 +7122,7 @@ public:
     Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
   }
 
-  void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) {
+  void update(const TargetLowering::AsmOperandInfo &OpInfo) {
     // Ideally, we would only check against memory constraints.  However, the
     // meaning of an Other constraint can be target-specific and we can't easily
     // reason about it.  Therefore, be conservative and set MayLoad/MayStore
@@ -7047,8 +7141,9 @@ public:
   unsigned get() const { return Flags; }
 };
 
+} // end anonymous namespace
+
 /// visitInlineAsm - Handle a call to an InlineAsm object.
-///
 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
 
@@ -7207,13 +7302,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   RegsForValue RetValRegs;
 
   // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
-  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+  std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
 
   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
 
     switch (OpInfo.Type) {
-    case InlineAsm::isOutput: {
+    case InlineAsm::isOutput:
       if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
           OpInfo.ConstraintType != TargetLowering::C_Register) {
         // Memory output, or 'other' output (e.g. 'X' constraint).
@@ -7264,7 +7359,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                                     : InlineAsm::Kind_RegDef,
                                 false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
       break;
-    }
+
     case InlineAsm::isInput: {
       SDValue InOperandVal = OpInfo.CallOperand;
 
@@ -7397,7 +7492,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                                                dl, DAG, AsmNodeOperands);
       break;
     }
-    case InlineAsm::isClobber: {
+    case InlineAsm::isClobber:
       // Add the clobbered value to the operand list, so that the register
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
@@ -7406,7 +7501,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                                                  AsmNodeOperands);
       break;
     }
-    }
   }
 
   // Finish up input operands.  Set the input chain and add the flag last.
@@ -7453,7 +7547,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       return;
   }
 
-  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+  std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
 
   // Process indirect outputs, first output all of the flagged copies out of
   // physregs.
@@ -7865,13 +7959,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
 
   switch (Intrinsic) {
   case Intrinsic::experimental_vector_reduce_fadd:
-    if (FMF.unsafeAlgebra())
+    if (FMF.isFast())
       Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
     else
       Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
     break;
   case Intrinsic::experimental_vector_reduce_fmul:
-    if (FMF.unsafeAlgebra())
+    if (FMF.isFast())
       Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
     else
       Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
@@ -7903,14 +7997,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
   case Intrinsic::experimental_vector_reduce_umin:
     Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
     break;
-  case Intrinsic::experimental_vector_reduce_fmax: {
+  case Intrinsic::experimental_vector_reduce_fmax:
     Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
     break;
-  }
-  case Intrinsic::experimental_vector_reduce_fmin: {
+  case Intrinsic::experimental_vector_reduce_fmin:
     Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
     break;
-  }
   default:
     llvm_unreachable("Unhandled vector reduce intrinsic");
   }
@@ -7955,10 +8047,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       uint64_t Offset = OldOffsets[i];
       MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
       unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
-      unsigned RegisterVTSize = RegisterVT.getSizeInBits();
+      unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
       RetTys.append(NumRegs, RegisterVT);
       for (unsigned j = 0; j != NumRegs; ++j)
-        Offsets.push_back(Offset + j * RegisterVTSize);
+        Offsets.push_back(Offset + j * RegisterVTByteSZ);
     }
   }
 
@@ -7996,6 +8088,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     Entry.IsSwiftError = false;
     Entry.Alignment = Align;
     CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+    CLI.NumFixedArgs += 1;
     CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
 
     // sret demotion isn't compatible with tail-calls, since the sret argument
@@ -8148,8 +8241,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       }
 
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
-                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
-                     true);
+                     CLI.CS.getInstruction(), ExtendKind, true);
 
       for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
@@ -8209,7 +8301,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     // The instruction result is the result of loading from the
     // hidden sret parameter.
     SmallVector<EVT, 1> PVTs;
-    Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+    Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
 
     ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
     assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -8326,9 +8418,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
   return true;
 }
 
-typedef DenseMap<const Argument *,
-                 std::pair<const AllocaInst *, const StoreInst *>>
-    ArgCopyElisionMapTy;
+using ArgCopyElisionMapTy =
+    DenseMap<const Argument *,
+             std::pair<const AllocaInst *, const StoreInst *>>;
 
 /// Scan the entry block of the function in FuncInfo for arguments that look
 /// like copies into a local alloca. Record any copied arguments in
@@ -8503,7 +8595,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     // Put in an sret pointer parameter before all the other parameters.
     SmallVector<EVT, 1> ValueVTs;
     ComputeValueVTs(*TLI, DAG.getDataLayout(),
-                    PointerType::getUnqual(F.getReturnType()), ValueVTs);
+                    F.getReturnType()->getPointerTo(
+                        DAG.getDataLayout().getAllocaAddrSpace()),
+                    ValueVTs);
 
     // NOTE: Assuming that a pointer will never break down to more than one VT
     // or one register.
@@ -8657,7 +8751,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     // from the sret argument into it.
     SmallVector<EVT, 1> ValueVTs;
     ComputeValueVTs(*TLI, DAG.getDataLayout(),
-                    PointerType::getUnqual(F.getReturnType()), ValueVTs);
+                    F.getReturnType()->getPointerTo(
+                        DAG.getDataLayout().getAllocaAddrSpace()),
+                    ValueVTs);
     MVT VT = ValueVTs[0].getSimpleVT();
     MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
     Optional<ISD::NodeType> AssertOp = None;
@@ -8749,11 +8845,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 
     SDB->setValue(&Arg, Res);
     if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+      // We want to associate the argument with the frame index, among
+      // involved operands, that correspond to the lowest address. The
+      // getCopyFromParts function, called earlier, is swapping the order of
+      // the operands to BUILD_PAIR depending on endianness. The result of
+      // that swapping is that the least significant bits of the argument will
+      // be in the first operand of the BUILD_PAIR node, and the most
+      // significant bits will be in the second operand.
+      unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
       if (LoadSDNode *LNode =
-          dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+          dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
         if (FrameIndexSDNode *FI =
             dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
-        FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+          FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
     }
 
     // Update the SwiftErrorVRegDefMap.
@@ -8813,7 +8917,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 /// directly add them, because expansion might result in multiple MBB's for one
 /// BB.  As such, the start of the BB might correspond to a different MBB than
 /// the end.
-///
 void
 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
   const TerminatorInst *TI = LLVMBB->getTerminator();
@@ -9249,10 +9352,12 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
 
   BitTestInfo BTI;
   std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
-    // Sort by probability first, number of bits second.
+    // Sort by probability first, number of bits second, bit mask third.
     if (a.ExtraProb != b.ExtraProb)
       return a.ExtraProb > b.ExtraProb;
-    return a.Bits > b.Bits;
+    if (a.Bits != b.Bits)
+      return a.Bits > b.Bits;
+    return a.Mask < b.Mask;
   });
 
   for (auto &CB : CBV) {
@@ -9441,10 +9546,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
   }
 
   if (TM.getOptLevel() != CodeGenOpt::None) {
-    // Order cases by probability so the most likely case will be checked first.
+    // Here, we order cases by probability so the most likely case will be
+    // checked first. However, two clusters can have the same probability in
+    // which case their relative ordering is non-deterministic. So we use Low
+    // as a tie-breaker as clusters are guaranteed to never overlap.
     std::sort(W.FirstCluster, W.LastCluster + 1,
               [](const CaseCluster &a, const CaseCluster &b) {
-      return a.Prob > b.Prob;
+      return a.Prob != b.Prob ?
+             a.Prob > b.Prob :
+             a.Low->getValue().slt(b.Low->getValue());
     });
 
     // Rearrange the case blocks so that the last one falls through if possible
@@ -9570,8 +9680,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
         }
 
         // The false probability is the sum of all unhandled cases.
-        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
-                     UnhandledProbs);
+        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
+                     getCurSDLoc(), I->Prob, UnhandledProbs);
 
         if (CurMBB == SwitchMBB)
           visitSwitchCase(CB, SwitchMBB);
@@ -9627,7 +9737,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
     I++;
   }
 
-  for (;;) {
+  while (true) {
     // Our binary search tree differs from a typical BST in that ours can have up
     // to three values in each leaf. The pivot selection above doesn't take that
     // into account, which means the tree might require more nodes and be less
@@ -9722,7 +9832,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
 
   // Create the CaseBlock record that will be used to lower the branch.
   CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
-               LeftProb, RightProb);
+               getCurSDLoc(), LeftProb, RightProb);
 
   if (W.MBB == SwitchMBB)
     visitSwitchCase(CB, SwitchMBB);
@@ -9730,6 +9840,76 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
     SwitchCases.push_back(CB);
 }
 
+// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
+// from the swith statement.
+static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
+                                            BranchProbability PeeledCaseProb) {
+  if (PeeledCaseProb == BranchProbability::getOne())
+    return BranchProbability::getZero();
+  BranchProbability SwitchProb = PeeledCaseProb.getCompl();
+
+  uint32_t Numerator = CaseProb.getNumerator();
+  uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
+  return BranchProbability(Numerator, std::max(Numerator, Denominator));
+}
+
+// Try to peel the top probability case if it exceeds the threshold.
+// Return current MachineBasicBlock for the switch statement if the peeling
+// does not occur.
+// If the peeling is performed, return the newly created MachineBasicBlock
+// for the peeled switch statement. Also update Clusters to remove the peeled
+// case. PeeledCaseProb is the BranchProbability for the peeled case.
+MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
+    const SwitchInst &SI, CaseClusterVector &Clusters,
+    BranchProbability &PeeledCaseProb) {
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+  // Don't perform if there is only one cluster or optimizing for size.
+  if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
+      TM.getOptLevel() == CodeGenOpt::None ||
+      SwitchMBB->getParent()->getFunction().optForMinSize())
+    return SwitchMBB;
+
+  BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
+  unsigned PeeledCaseIndex = 0;
+  bool SwitchPeeled = false;
+  for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
+    CaseCluster &CC = Clusters[Index];
+    if (CC.Prob < TopCaseProb)
+      continue;
+    TopCaseProb = CC.Prob;
+    PeeledCaseIndex = Index;
+    SwitchPeeled = true;
+  }
+  if (!SwitchPeeled)
+    return SwitchMBB;
+
+  DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb
+               << "\n");
+
+  // Record the MBB for the peeled switch statement.
+  MachineFunction::iterator BBI(SwitchMBB);
+  ++BBI;
+  MachineBasicBlock *PeeledSwitchMBB =
+      FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
+  FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
+
+  ExportFromCurrentBlock(SI.getCondition());
+  auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
+  SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
+                          nullptr,   nullptr,      TopCaseProb.getCompl()};
+  lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
+
+  Clusters.erase(PeeledCaseIt);
+  for (CaseCluster &CC : Clusters) {
+    DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: "
+                 << CC.Prob << "\n");
+    CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
+    DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+  }
+  PeeledCaseProb = TopCaseProb;
+  return PeeledSwitchMBB;
+}
+
 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   // Extract cases from the switch.
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -9783,9 +9963,15 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     }
   }
 
+  // The branch probablity of the peeled case.
+  BranchProbability PeeledCaseProb = BranchProbability::getZero();
+  MachineBasicBlock *PeeledSwitchMBB =
+      peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
+
   // If there is only the default destination, jump there directly.
   MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
   if (Clusters.empty()) {
+    assert(PeeledSwitchMBB == SwitchMBB);
     SwitchMBB->addSuccessor(DefaultMBB);
     if (DefaultMBB != NextBlock(SwitchMBB)) {
       DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
@@ -9817,8 +10003,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   SwitchWorkList WorkList;
   CaseClusterIt First = Clusters.begin();
   CaseClusterIt Last = Clusters.end() - 1;
-  auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
-  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+  auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
+  // Scale the branchprobability for DefaultMBB if the peel occurs and
+  // DefaultMBB is not replaced.
+  if (PeeledCaseProb != BranchProbability::getZero() &&
+      DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
+    DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
+  WorkList.push_back(
+      {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
 
   while (!WorkList.empty()) {
     SwitchWorkListItem W = WorkList.back();
@@ -9826,7 +10018,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
 
     if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
-        !DefaultMBB->getParent()->getFunction()->optForMinSize()) {
+        !DefaultMBB->getParent()->getFunction().optForMinSize()) {
       // For optimized builds, lower large range as a balanced binary tree.
       splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
       continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ac1d6aae65a5..9e7c2bc6821b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===//
+//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,67 +16,75 @@
 
 #include "StatepointLowering.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Statepoint.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
 #include <utility>
 #include <vector>
 
 namespace llvm {
 
-class AddrSpaceCastInst;
 class AllocaInst;
+class AtomicCmpXchgInst;
+class AtomicRMWInst;
 class BasicBlock;
-class BitCastInst;
 class BranchInst;
 class CallInst;
+class CatchPadInst;
+class CatchReturnInst;
+class CatchSwitchInst;
+class CleanupPadInst;
+class CleanupReturnInst;
+class Constant;
+class ConstantInt;
+class ConstrainedFPIntrinsic;
 class DbgValueInst;
-class ExtractElementInst;
-class FCmpInst;
-class FPExtInst;
-class FPToSIInst;
-class FPToUIInst;
-class FPTruncInst;
-class Function;
+class DataLayout;
+class DIExpression;
+class DILocalVariable;
+class DILocation;
+class FenceInst;
 class FunctionLoweringInfo;
-class GetElementPtrInst;
 class GCFunctionInfo;
-class ICmpInst;
-class IntToPtrInst;
+class GCRelocateInst;
+class GCResultInst;
 class IndirectBrInst;
 class InvokeInst;
-class InsertElementInst;
-class Instruction;
+class LandingPadInst;
+class LLVMContext;
 class LoadInst;
 class MachineBasicBlock;
-class MachineInstr;
-class MachineRegisterInfo;
-class MDNode;
-class MVT;
 class PHINode;
-class PtrToIntInst;
+class ResumeInst;
 class ReturnInst;
 class SDDbgValue;
-class SExtInst;
-class SelectInst;
-class ShuffleVectorInst;
-class SIToFPInst;
 class StoreInst;
 class SwitchInst;
-class DataLayout;
 class TargetLibraryInfo;
-class TargetLowering;
-class TruncInst;
-class UIToFPInst;
-class UnreachableInst;
+class TargetMachine;
+class Type;
 class VAArgInst;
-class ZExtInst;
+class UnreachableInst;
+class Use;
+class User;
+class Value;
 
 //===----------------------------------------------------------------------===//
 /// SelectionDAGBuilder - This is the common target-independent lowering
@@ -84,7 +92,7 @@ class ZExtInst;
 ///
 class SelectionDAGBuilder {
   /// CurInst - The current instruction being visited
-  const Instruction *CurInst;
+  const Instruction *CurInst = nullptr;
 
   DenseMap<const Value*, SDValue> NodeMap;
 
@@ -94,13 +102,15 @@ class SelectionDAGBuilder {
 
   /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
   class DanglingDebugInfo {
-    const DbgValueInst* DI;
+    const DbgValueInst* DI = nullptr;
     DebugLoc dl;
-    unsigned SDNodeOrder;
+    unsigned SDNodeOrder = 0;
+
   public:
-    DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { }
+    DanglingDebugInfo() = default;
     DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
         : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
+
     const DbgValueInst* getDI() { return DI; }
     DebugLoc getdl() { return dl; }
     unsigned getSDNodeOrder() { return SDNodeOrder; }
@@ -120,8 +130,8 @@ public:
   /// State used while lowering a statepoint sequence (gc_statepoint,
   /// gc_relocate, and gc_result).  See StatepointLowering.hpp/cpp for details.
   StatepointLoweringState StatepointLowering;
-private:
 
+private:
   /// PendingExports - CopyToReg nodes that copy values to virtual registers
   /// for export to other blocks need to be emitted before any terminator
   /// instruction, but they have no other ordering requirements. We bunch them
@@ -189,23 +199,22 @@ private:
     }
   };
 
-  typedef std::vector<CaseCluster> CaseClusterVector;
-  typedef CaseClusterVector::iterator CaseClusterIt;
+  using CaseClusterVector = std::vector<CaseCluster>;
+  using CaseClusterIt = CaseClusterVector::iterator;
 
   struct CaseBits {
-    uint64_t Mask;
-    MachineBasicBlock* BB;
-    unsigned Bits;
+    uint64_t Mask = 0;
+    MachineBasicBlock* BB = nullptr;
+    unsigned Bits = 0;
     BranchProbability ExtraProb;
 
+    CaseBits() = default;
     CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
              BranchProbability Prob):
-      Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
-
-    CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
+      Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
   };
 
-  typedef std::vector<CaseBits> CaseBitsVector;
+  using CaseBitsVector = std::vector<CaseBits>;
 
   /// Sort Clusters and merge adjacent cases.
   void sortAndRangeify(CaseClusterVector &Clusters);
@@ -214,15 +223,6 @@ private:
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
   /// blocks needed by multi-case switch statements.
   struct CaseBlock {
-    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
-              const Value *cmpmiddle, MachineBasicBlock *truebb,
-              MachineBasicBlock *falsebb, MachineBasicBlock *me,
-              BranchProbability trueprob = BranchProbability::getUnknown(),
-              BranchProbability falseprob = BranchProbability::getUnknown())
-        : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
-          TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
-          FalseProb(falseprob) {}
-
     // CC - the condition code to use for the case block's setcc node
     ISD::CondCode CC;
 
@@ -237,14 +237,25 @@ private:
     // ThisBB - the block into which to emit the code for the setcc and branches
     MachineBasicBlock *ThisBB;
 
+    /// The debug location of the instruction this CaseBlock was
+    /// produced from.
+    SDLoc DL;
+
     // TrueProb/FalseProb - branch weights.
     BranchProbability TrueProb, FalseProb;
+
+    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+              const Value *cmpmiddle, MachineBasicBlock *truebb,
+              MachineBasicBlock *falsebb, MachineBasicBlock *me,
+              SDLoc dl,
+              BranchProbability trueprob = BranchProbability::getUnknown(),
+              BranchProbability falseprob = BranchProbability::getUnknown())
+        : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+          TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
+          TrueProb(trueprob), FalseProb(falseprob) {}
   };
 
   struct JumpTable {
-    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
-              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
-
     /// Reg - the virtual register containing the index of the jump table entry
     //. to jump to.
     unsigned Reg;
@@ -255,39 +266,38 @@ private:
     /// Default - the MBB of the default bb, which is a successor of the range
     /// check MBB.  This is when updating PHI nodes in successors.
     MachineBasicBlock *Default;
+
+    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
   };
   struct JumpTableHeader {
-    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
-                    bool E = false)
-        : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
-          Emitted(E) {}
     APInt First;
     APInt Last;
     const Value *SValue;
     MachineBasicBlock *HeaderBB;
     bool Emitted;
+
+    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+                    bool E = false)
+        : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
+          Emitted(E) {}
   };
-  typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+  using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
 
   struct BitTestCase {
-    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
-                BranchProbability Prob):
-      Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
     uint64_t Mask;
     MachineBasicBlock *ThisBB;
     MachineBasicBlock *TargetBB;
     BranchProbability ExtraProb;
+
+    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+                BranchProbability Prob):
+      Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
   };
 
-  typedef SmallVector<BitTestCase, 3> BitTestInfo;
+  using BitTestInfo = SmallVector<BitTestCase, 3>;
 
   struct BitTestBlock {
-    BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
-                 bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
-                 BitTestInfo C, BranchProbability Pr)
-        : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
-          RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
-          Cases(std::move(C)), Prob(Pr) {}
     APInt First;
     APInt Range;
     const Value *SValue;
@@ -300,6 +310,13 @@ private:
     BitTestInfo Cases;
     BranchProbability Prob;
     BranchProbability DefaultProb;
+
+    BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+                 bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+                 BitTestInfo C, BranchProbability Pr)
+        : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
+          RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
+          Cases(std::move(C)), Prob(Pr) {}
   };
 
   /// Return the range of value in [First..Last].
@@ -336,7 +353,7 @@ private:
     const ConstantInt *LT;
     BranchProbability DefaultProb;
   };
-  typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
+  using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
 
   /// Determine the rank by weight of CC in [First,Last]. If CC has more weight
   /// than each cluster in the range, its rank is 0.
@@ -352,6 +369,10 @@ private:
                      MachineBasicBlock *SwitchMBB,
                      MachineBasicBlock *DefaultMBB);
 
+  /// Peel the top probability case if it exceeds the threshold
+  MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI,
+                                             CaseClusterVector &Clusters,
+                                             BranchProbability &PeeledCaseProb);
 
   /// A class which encapsulates all of the information needed to generate a
   /// stack protector check and signals to isel via its state being initialized
@@ -466,8 +487,7 @@ private:
   ///        the same function, use the same failure basic block).
   class StackProtectorDescriptor {
   public:
-    StackProtectorDescriptor()
-        : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {}
+    StackProtectorDescriptor() = default;
 
     /// Returns true if all fields of the stack protector descriptor are
     /// initialized implying that we should/are ready to emit a stack protector.
@@ -533,15 +553,15 @@ private:
     /// replace it with a compare/branch to the successor mbbs
     /// SuccessMBB/FailureMBB depending on whether or not the stack protector
     /// was violated.
-    MachineBasicBlock *ParentMBB;
+    MachineBasicBlock *ParentMBB = nullptr;
 
     /// A basic block visited on stack protector check success that contains the
     /// terminators of ParentMBB.
-    MachineBasicBlock *SuccessMBB;
+    MachineBasicBlock *SuccessMBB = nullptr;
 
     /// This basic block visited on stack protector check failure that will
     /// contain a call to __stack_chk_fail().
-    MachineBasicBlock *FailureMBB;
+    MachineBasicBlock *FailureMBB = nullptr;
 
     /// Add a successor machine basic block to ParentMBB. If the successor mbb
     /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
@@ -554,25 +574,29 @@ private:
 
 private:
   const TargetMachine &TM;
+
 public:
   /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
   /// nodes without a corresponding SDNode.
   static const unsigned LowestSDNodeOrder = 1;
 
   SelectionDAG &DAG;
-  const DataLayout *DL;
-  AliasAnalysis *AA;
+  const DataLayout *DL = nullptr;
+  AliasAnalysis *AA = nullptr;
   const TargetLibraryInfo *LibInfo;
 
   /// SwitchCases - Vector of CaseBlock structures used to communicate
   /// SwitchInst code generation information.
   std::vector<CaseBlock> SwitchCases;
+
   /// JTCases - Vector of JumpTable structures used to communicate
   /// SwitchInst code generation information.
   std::vector<JumpTableBlock> JTCases;
+
   /// BitTestCases - Vector of BitTestBlock structures used to communicate
   /// SwitchInst code generation information.
   std::vector<BitTestBlock> BitTestCases;
+
   /// A StackProtectorDescriptor structure used to communicate stack protector
   /// information in between SelectBasicBlock and FinishBasicBlock.
   StackProtectorDescriptor SPDescriptor;
@@ -589,22 +613,19 @@ public:
   GCFunctionInfo *GFI;
 
   /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
-  DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+  DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
 
   /// HasTailCall - This is set to true if a call in the current
   /// block has been translated as a tail call. In this case,
   /// no subsequent DAG nodes should be created.
-  ///
-  bool HasTailCall;
+  bool HasTailCall = false;
 
   LLVMContext *Context;
 
   SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
-    : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
-      DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo),
-      HasTailCall(false) {
-  }
+    : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+      FuncInfo(funcinfo) {}
 
   void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
             const TargetLibraryInfo *li);
@@ -653,6 +674,7 @@ public:
   // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
   // generate the debug data structures now that we've seen its definition.
   void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+
   SDValue getValue(const Value *V);
   bool findValue(const Value *V) const;
 
@@ -923,13 +945,12 @@ private:
 
   void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
 
-  /// EmitFuncArgumentDbgValue - If V is an function argument then create
-  /// corresponding DBG_VALUE machine instruction for it now. At the end of
-  /// instruction selection, they will be inserted to the entry BB.
+  /// If V is an function argument then create corresponding DBG_VALUE machine
+  /// instruction for it now. At the end of instruction selection, they will be
+  /// inserted to the entry BB.
   bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
                                 DIExpression *Expr, DILocation *DL,
-                                int64_t Offset, bool IsDbgDeclare,
-                                const SDValue &N);
+                                bool IsDbgDeclare, const SDValue &N);
 
   /// Return the next block after MBB, or nullptr if there is none.
   MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
@@ -940,8 +961,8 @@ private:
 
   /// Return the appropriate SDDbgValue based on N.
   SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
-                          DIExpression *Expr, int64_t Offset,
-                          const DebugLoc &dl, unsigned DbgSDNodeOrder);
+                          DIExpression *Expr, const DebugLoc &dl,
+                          unsigned DbgSDNodeOrder);
 };
 
 /// RegsForValue - This struct represents the registers (physical or virtual)
@@ -978,13 +999,11 @@ struct RegsForValue {
 
   /// Records if this value needs to be treated in an ABI dependant manner,
   /// different to normal type legalization.
-  bool IsABIMangled;
-
-  RegsForValue();
+  bool IsABIMangled = false;
 
+  RegsForValue() = default;
   RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
                bool IsABIMangledValue = false);
-
   RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
                const DataLayout &DL, unsigned Reg, Type *Ty,
                bool IsABIMangledValue = false);
@@ -1024,4 +1043,4 @@ struct RegsForValue {
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 3dd58975b1f1..dd30dc16378c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,24 +11,42 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Printable.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cstdint>
+#include <iterator>
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -385,6 +403,7 @@ static Printable PrintNodeId(const SDNode &Node) {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
+
 LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
   print(dbgs(), G);
   dbgs() << '\n';
@@ -402,6 +421,36 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
 }
 
 void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+  if (getFlags().hasNoUnsignedWrap())
+    OS << " nuw";
+
+  if (getFlags().hasNoSignedWrap())
+    OS << " nsw";
+
+  if (getFlags().hasExact())
+    OS << " exact";
+
+  if (getFlags().hasUnsafeAlgebra())
+    OS << " unsafe";
+
+  if (getFlags().hasNoNaNs())
+    OS << " nnan";
+
+  if (getFlags().hasNoInfs())
+    OS << " ninf";
+
+  if (getFlags().hasNoSignedZeros())
+    OS << " nsz";
+
+  if (getFlags().hasAllowReciprocal())
+    OS << " arcp";
+
+  if (getFlags().hasAllowContract())
+    OS << " contract";
+
+  if (getFlags().hasVectorReduction())
+    OS << " vector-reduction";
+
   if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
     if (!MN->memoperands_empty()) {
       OS << "<";
@@ -429,9 +478,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
     OS << '<' << CSDN->getAPIntValue() << '>';
   } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
-    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle())
+    if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle())
       OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
-    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble())
+    else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble())
       OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
     else {
       OS << "<APFloat(";
@@ -479,7 +528,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << LBB->getName() << " ";
     OS << (const void*)BBDN->getBasicBlock() << ">";
   } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
-    OS << ' ' << PrintReg(R->getReg(),
+    OS << ' ' << printReg(R->getReg(),
                           G ? G->getSubtarget().getRegisterInfo() : nullptr);
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
@@ -640,7 +689,8 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
+using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>;
+
 static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
                        const SelectionDAG *G, VisitedSDNodeSet &once) {
   if (!once.insert(N).second) // If we've been here before, return now.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bdf57e805842..18f6997ef83c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "ScheduleDAGSDNodes.h"
 #include "SelectionDAGBuilder.h"
 #include "llvm/ADT/APInt.h"
@@ -26,7 +27,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -45,9 +46,12 @@
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -80,13 +84,9 @@
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <cassert>
@@ -212,7 +212,7 @@ namespace llvm {
       IS.OptLevel = NewOptLevel;
       IS.TM.setOptLevel(NewOptLevel);
       DEBUG(dbgs() << "\nChanging optimization level for Function "
-            << IS.MF->getFunction()->getName() << "\n");
+            << IS.MF->getFunction().getName() << "\n");
       DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
             << " ; After: -O" << NewOptLevel << "\n");
       SavedFastISel = IS.TM.Options.EnableFastISel;
@@ -228,7 +228,7 @@ namespace llvm {
       if (IS.OptLevel == SavedOptLevel)
         return;
       DEBUG(dbgs() << "\nRestoring optimization level for Function "
-            << IS.MF->getFunction()->getName() << "\n");
+            << IS.MF->getFunction().getName() << "\n");
       DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
             << " ; After: -O" << SavedOptLevel << "\n");
       IS.OptLevel = SavedOptLevel;
@@ -384,7 +384,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
          "-fast-isel-abort > 0 requires -fast-isel");
 
-  const Function &Fn = *mf.getFunction();
+  const Function &Fn = mf.getFunction();
   MF = &mf;
 
   // Reset the target options before resetting the optimization
@@ -414,7 +414,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
 
-  CurDAG->init(*MF, *ORE);
+  CurDAG->init(*MF, *ORE, this);
   FuncInfo->set(Fn, *MF, CurDAG);
 
   // Now get the optional analyzes if we want to.
@@ -494,10 +494,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   DenseMap<unsigned, unsigned> LiveInMap;
   if (!FuncInfo->ArgDbgValues.empty())
-    for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
-           E = RegInfo->livein_end(); LI != E; ++LI)
-      if (LI->second)
-        LiveInMap.insert(std::make_pair(LI->first, LI->second));
+    for (std::pair<unsigned, unsigned> LI : RegInfo->liveins())
+      if (LI.second)
+        LiveInMap.insert(LI);
 
   // Insert DBG_VALUE instructions for function arguments to the entry block.
   for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
@@ -529,12 +528,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       const MDNode *Expr = MI->getDebugExpression();
       DebugLoc DL = MI->getDebugLoc();
       bool IsIndirect = MI->isIndirectDebugValue();
-      unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+      if (IsIndirect)
+        assert(MI->getOperand(1).getImm() == 0 &&
+               "DBG_VALUE with nonzero offset");
       assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
              "Expected inlined-at fields to agree");
       // Def is never a terminator here, so it is ok to increment InsertPos.
       BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
-              IsIndirect, LDI->second, Offset, Variable, Expr);
+              IsIndirect, LDI->second, Variable, Expr);
 
       // If this vreg is directly copied into an exported register then
       // that COPY instructions also need DBG_VALUE, if it is the only
@@ -556,7 +557,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
         // declared, rather than whatever is attached to CopyUseMI.
         MachineInstr *NewMI =
             BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
-                    CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
+                    CopyUseMI->getOperand(0).getReg(), Variable, Expr);
         MachineBasicBlock::iterator Pos = CopyUseMI;
         EntryMBB->insertAfter(Pos, NewMI);
       }
@@ -644,6 +645,9 @@ static void reportFastISelFailure(MachineFunction &MF,
 void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
                                         BasicBlock::const_iterator End,
                                         bool &HadTailCall) {
+  // Allow creating illegal types during DAG building for the basic block.
+  CurDAG->NewNodesMustHaveLegalTypes = false;
+
   // Lower the instructions. If a call is emitted as a tail call, cease emitting
   // nodes for this block.
   for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
@@ -726,8 +730,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     BlockName =
         (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
   }
-  DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
-        << " '" << BlockName << "'\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB)
+               << " '" << BlockName << "'\n";
+        CurDAG->dump());
 
   if (ViewDAGCombine1 && MatchFilterBB)
     CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -739,8 +744,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
   }
 
-  DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
-        << " '" << BlockName << "'\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Optimized lowered selection DAG: "
+               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+               << "'\n";
+        CurDAG->dump());
 
   // Second step, hack on the DAG until it only uses operations and types that
   // the target supports.
@@ -754,8 +761,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     Changed = CurDAG->LegalizeTypes();
   }
 
-  DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
-        << " '" << BlockName << "'\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Type-legalized selection DAG: "
+               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+               << "'\n";
+        CurDAG->dump());
 
   // Only allow creation of legal node types.
   CurDAG->NewNodesMustHaveLegalTypes = true;
@@ -771,8 +780,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
     }
 
-    DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
-          << " '" << BlockName << "'\n"; CurDAG->dump());
+    DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
+                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+                 << "'\n";
+          CurDAG->dump());
   }
 
   {
@@ -782,8 +793,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   }
 
   if (Changed) {
-    DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber
-          << " '" << BlockName << "'\n"; CurDAG->dump());
+    DEBUG(dbgs() << "Vector-legalized selection DAG: "
+                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+                 << "'\n";
+          CurDAG->dump());
 
     {
       NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
@@ -791,8 +804,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->LegalizeTypes();
     }
 
-    DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber
-          << " '" << BlockName << "'\n"; CurDAG->dump());
+    DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
+                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+                 << "'\n";
+          CurDAG->dump());
 
     if (ViewDAGCombineLT && MatchFilterBB)
       CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -804,8 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
     }
 
-    DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
-          << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+    DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
+                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+                 << "'\n";
+          CurDAG->dump());
   }
 
   if (ViewLegalizeDAGs && MatchFilterBB)
@@ -817,8 +834,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Legalize();
   }
 
-  DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
-        << " '" << BlockName << "'\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Legalized selection DAG: "
+               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+               << "'\n";
+        CurDAG->dump());
 
   if (ViewDAGCombine2 && MatchFilterBB)
     CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -830,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
   }
 
-  DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
-        << " '" << BlockName << "'\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Optimized legalized selection DAG: "
+               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+               << "'\n";
+        CurDAG->dump());
 
   if (OptLevel != CodeGenOpt::None)
     ComputeLiveOutVRegInfo();
@@ -847,8 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     DoInstructionSelection();
   }
 
-  DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
-        << " '" << BlockName << "'\n"; CurDAG->dump());
+  DEBUG(dbgs() << "Selected selection DAG: "
+               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+               << "'\n";
+        CurDAG->dump());
 
   if (ViewSchedDAGs && MatchFilterBB)
     CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -915,9 +938,9 @@ public:
 } // end anonymous namespace
 
 void SelectionDAGISel::DoInstructionSelection() {
-  DEBUG(dbgs() << "===== Instruction selection begins: BB#"
-        << FuncInfo->MBB->getNumber()
-        << " '" << FuncInfo->MBB->getName() << "'\n");
+  DEBUG(dbgs() << "===== Instruction selection begins: "
+               << printMBBReference(*FuncInfo->MBB) << " '"
+               << FuncInfo->MBB->getName() << "'\n");
 
   PreprocessISelDAG();
 
@@ -1138,7 +1161,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
 
       // Look through casts and constant offset GEPs. These mostly come from
       // inalloca.
-      APInt Offset(DL.getPointerSizeInBits(0), 0);
+      APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
       Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
 
       // Check if the variable is a static alloca or a byval or inalloca
@@ -1177,12 +1200,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
 
   // For each machine basic block in reverse post order.
   ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
-  for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
-           It = RPOT.begin(),
-           E = RPOT.end();
-       It != E; ++It) {
-    MachineBasicBlock *MBB = *It;
-
+  for (MachineBasicBlock *MBB : RPOT) {
     // For each swifterror value in the function.
     for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
       auto Key = std::make_pair(MBB, SwiftErrorVal);
@@ -1253,6 +1271,8 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
       // If we don't need a phi create a copy to the upward exposed vreg.
       if (!needPHI) {
         assert(UpwardsUse);
+        assert(!VRegs.empty() &&
+               "No predecessors?  Is the Calling Convention correct?");
         unsigned DestReg = UUseVReg;
         BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
                 DestReg)
@@ -1282,10 +1302,10 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
   }
 }
 
-void preassignSwiftErrorRegs(const TargetLowering *TLI,
-                             FunctionLoweringInfo *FuncInfo,
-                             BasicBlock::const_iterator Begin,
-                             BasicBlock::const_iterator End) {
+static void preassignSwiftErrorRegs(const TargetLowering *TLI,
+                                    FunctionLoweringInfo *FuncInfo,
+                                    BasicBlock::const_iterator Begin,
+                                    BasicBlock::const_iterator End) {
   if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
     return;
 
@@ -2774,6 +2794,12 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
     Result = !::CheckType(Table, Index, N, SDISel.TLI,
                           SDISel.CurDAG->getDataLayout());
     return Index;
+  case SelectionDAGISel::OPC_CheckTypeRes: {
+    unsigned Res = Table[Index++];
+    Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI,
+                          SDISel.CurDAG->getDataLayout());
+    return Index;
+  }
   case SelectionDAGISel::OPC_CheckChild0Type:
   case SelectionDAGISel::OPC_CheckChild1Type:
   case SelectionDAGISel::OPC_CheckChild2Type:
@@ -2906,6 +2932,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
   case ISD::CopyFromReg:
   case ISD::CopyToReg:
   case ISD::EH_LABEL:
+  case ISD::ANNOTATION_LABEL:
   case ISD::LIFETIME_START:
   case ISD::LIFETIME_END:
     NodeToMatch->setNodeId(-1); // Mark selected.
@@ -3175,6 +3202,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
         break;
       continue;
 
+    case OPC_CheckTypeRes: {
+      unsigned Res = MatcherTable[MatcherIndex++];
+      if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI,
+                       CurDAG->getDataLayout()))
+        break;
+      continue;
+    }
+
     case OPC_SwitchOpcode: {
       unsigned CurNodeOpcode = N.getOpcode();
       unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -3548,6 +3583,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
                "NodeToMatch was removed partway through selection");
         SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
                                                               SDNode *E) {
+          CurDAG->salvageDebugInfo(*N);
           auto &Chain = ChainNodesMatched;
           assert((!E || !is_contained(Chain, N)) &&
                  "Chain node replaced during MorphNode");
@@ -3725,6 +3761,25 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
   }
 }
 
+bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
+  assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
+  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return false;
+
+  // Detect when "or" is used to add an offset to a stack object.
+  if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
+    MachineFrameInfo &MFI = MF->getFrameInfo();
+    unsigned A = MFI.getObjectAlignment(FN->getIndex());
+    assert(isPowerOf2_32(A) && "Unexpected alignment");
+    int32_t Off = C->getSExtValue();
+    // If the alleged offset fits in the zero bits guaranteed by
+    // the alignment, then this or is really an add.
+    return (Off >= 0) && (((A - 1) & Off) == unsigned(Off));
+  }
+  return false;
+}
+
 void SelectionDAGISel::CannotYetSelect(SDNode *N) {
   std::string msg;
   raw_string_ostream Msg(msg);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 11561dfa5947..be4ab094bf49 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -16,15 +16,13 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "dag-printer"
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
index 55f70f7d9fd3..3a283bc5fdc0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===//
+//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
 using namespace llvm;
 
-SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {}
+SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default;
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5d78bba86d73..3f64b49e3555 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1,4 +1,4 @@
-//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===//
+//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,21 +14,44 @@
 
 #include "StatepointLowering.h"
 #include "SelectionDAGBuilder.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Statepoint.h"
-#include "llvm/Target/TargetLowering.h"
-#include <algorithm>
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "statepoint-lowering"
@@ -73,7 +96,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
   NumSlotsAllocatedForStatepoints++;
   MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
 
-  unsigned SpillSize = ValueType.getSizeInBits() / 8;
+  unsigned SpillSize = ValueType.getStoreSize();
   assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
 
   // First look for a previously created stack slot which is not in
@@ -200,7 +223,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
 /// values on the stack between calls.
 static void reservePreviousStackSlotForValue(const Value *IncomingValue,
                                              SelectionDAGBuilder &Builder) {
-
   SDValue Incoming = Builder.getValue(IncomingValue);
 
   if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
@@ -292,7 +314,6 @@ removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
 static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
     SelectionDAGBuilder::StatepointLoweringInfo &SI,
     SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
-
   SDValue ReturnValue, CallEndVal;
   std::tie(ReturnValue, CallEndVal) =
       Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
index b043184003a0..372c82a359f6 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -1,4 +1,4 @@
-//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===//
+//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,11 +16,16 @@
 #define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include <cassert>
 
 namespace llvm {
+
+class CallInst;
 class SelectionDAGBuilder;
 
 /// This class tracks both per-statepoint and per-selectiondag information.
@@ -30,7 +35,7 @@ class SelectionDAGBuilder;
 /// works in concert with information in FunctionLoweringInfo.
 class StatepointLoweringState {
 public:
-  StatepointLoweringState() : NextSlotToAllocate(0) {}
+  StatepointLoweringState() = default;
 
   /// Reset all state tracking for a newly encountered safepoint.  Also
   /// performs some consistency checking.
@@ -69,7 +74,7 @@ public:
   /// before the next statepoint.  If we weren't expecting to see
   /// it, we'll report an assertion.
   void relocCallVisited(const CallInst &RelocCall) {
-    auto I = find(PendingGCRelocateCalls, &RelocCall);
+    auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
     assert(I != PendingGCRelocateCalls.end() &&
            "Visited unexpected gcrelocate call");
     PendingGCRelocateCalls.erase(I);
@@ -108,11 +113,12 @@ private:
   SmallBitVector AllocatedStackSlots;
 
   /// Points just beyond the last slot known to have been allocated
-  unsigned NextSlotToAllocate;
+  unsigned NextSlotToAllocate = 0;
 
   /// Keep track of pending gcrelocate calls for consistency check
   SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
 };
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8652df7bbd70..58276052c10b 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -20,6 +20,9 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -29,10 +32,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cctype>
 using namespace llvm;
 
@@ -52,11 +52,11 @@ bool TargetLowering::isPositionIndependent() const {
 /// so, it sets Chain to the input chain of the tail call.
 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
                                           SDValue &Chain) const {
-  const Function *F = DAG.getMachineFunction().getFunction();
+  const Function &F = DAG.getMachineFunction().getFunction();
 
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
-  AttributeList CallerAttrs = F->getAttributes();
+  AttributeList CallerAttrs = F.getAttributes();
   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
           .removeAttribute(Attribute::NoAlias)
           .hasAttributes())
@@ -408,7 +408,7 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
   // Search for the smallest integer type with free casts to and from
   // Op's type. For expedience, just check power-of-2 integer types.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+  unsigned DemandedSize = Demanded.getActiveBits();
   unsigned SmallVTBits = DemandedSize;
   if (!isPowerOf2_32(SmallVTBits))
     SmallVTBits = NextPowerOf2(SmallVTBits);
@@ -421,9 +421,8 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
           Op.getOpcode(), dl, SmallVT,
           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
-      bool NeedZext = DemandedSize > SmallVTBits;
-      SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
-                              dl, Op.getValueType(), X);
+      assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
+      SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
       return TLO.CombineTo(Op, Z);
     }
   }
@@ -459,7 +458,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
   // If Old has more than one use then it must be Op, because the
   // AssumeSingleUse flag is not propogated to recursive calls of
   // SimplifyDemanded bits, so the only node with multiple use that
-  // it will attempt to combine will be opt.
+  // it will attempt to combine will be Op.
   assert(TLO.Old == Op);
 
   SmallVector <SDValue, 4> NewOps;
@@ -470,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
     }
     NewOps.push_back(User->getOperand(i));
   }
-  TLO.DAG.UpdateNodeOperands(User, NewOps);
+  User = TLO.DAG.UpdateNodeOperands(User, NewOps);
   // Op has less users now, so we may be able to perform additional combines
   // with it.
   DCI.AddToWorklist(Op.getNode());
@@ -480,7 +479,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
   return true;
 }
 
-bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
                                           DAGCombinerInfo &DCI) const {
 
   SelectionDAG &DAG = DCI.DAG;
@@ -517,6 +516,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   // Don't know anything.
   Known = KnownBits(BitWidth);
 
+  if (Op.getOpcode() == ISD::Constant) {
+    // We know all of the bits for a constant!
+    Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+    Known.Zero = ~Known.One;
+    return false;
+  }
+
   // Other users may use these bits.
   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
     if (Depth != 0) {
@@ -539,11 +545,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
   KnownBits Known2, KnownOut;
   switch (Op.getOpcode()) {
-  case ISD::Constant:
-    // We know all of the bits for a constant!
-    Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
-    Known.Zero = ~Known.One;
-    return false;   // Don't fall through, will infinitely loop.
   case ISD::BUILD_VECTOR:
     // Collect the known bits that are shared by every constant vector element.
     Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -780,33 +781,38 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::SHL:
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned ShAmt = SA->getZExtValue();
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
       SDValue InOp = Op.getOperand(0);
 
       // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
+      if (SA->getAPIntValue().uge(BitWidth))
         break;
 
+      unsigned ShAmt = SA->getZExtValue();
+
       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
       // single shift.  We can do this if the bottom bits (which are shifted
       // out) are never demanded.
-      if (InOp.getOpcode() == ISD::SRL &&
-          isa<ConstantSDNode>(InOp.getOperand(1))) {
-        if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
-          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
-          unsigned Opc = ISD::SHL;
-          int Diff = ShAmt-C1;
-          if (Diff < 0) {
-            Diff = -Diff;
-            Opc = ISD::SRL;
-          }
+      if (InOp.getOpcode() == ISD::SRL) {
+        if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+          if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+            if (SA2->getAPIntValue().ult(BitWidth)) {
+              unsigned C1 = SA2->getZExtValue();
+              unsigned Opc = ISD::SHL;
+              int Diff = ShAmt-C1;
+              if (Diff < 0) {
+                Diff = -Diff;
+                Opc = ISD::SRL;
+              }
 
-          SDValue NewSA =
-            TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
-          EVT VT = Op.getValueType();
-          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
-                                                   InOp.getOperand(0), NewSA));
+              SDValue NewSA =
+                TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+              EVT VT = Op.getValueType();
+              return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                       InOp.getOperand(0),
+                                                       NewSA));
+            }
+          }
         }
       }
 
@@ -818,7 +824,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
         SDValue InnerOp = InOp.getOperand(0);
         EVT InnerVT = InnerOp.getValueType();
-        unsigned InnerBits = InnerVT.getSizeInBits();
+        unsigned InnerBits = InnerVT.getScalarSizeInBits();
         if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
           EVT ShTy = getShiftAmountTy(InnerVT, DL);
@@ -837,45 +843,42 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
         // aren't demanded (as above) and that the shifted upper c1 bits of
         // x aren't demanded.
-        if (InOp.hasOneUse() &&
-            InnerOp.getOpcode() == ISD::SRL &&
-            InnerOp.hasOneUse() &&
-            isa<ConstantSDNode>(InnerOp.getOperand(1))) {
-          unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
-            ->getZExtValue();
-          if (InnerShAmt < ShAmt &&
-              InnerShAmt < InnerBits &&
-              NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
-              NewMask.countTrailingZeros() >= ShAmt) {
-            SDValue NewSA =
-              TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
-                                  Op.getOperand(1).getValueType());
-            EVT VT = Op.getValueType();
-            SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
-                                             InnerOp.getOperand(0));
-            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
-                                                     NewExt, NewSA));
+        if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+            InnerOp.hasOneUse()) {
+          if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
+            unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
+            if (InnerShAmt < ShAmt &&
+                InnerShAmt < InnerBits &&
+                NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
+                NewMask.countTrailingZeros() >= ShAmt) {
+              SDValue NewSA =
+                TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+                                    Op.getOperand(1).getValueType());
+              EVT VT = Op.getValueType();
+              SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+                                               InnerOp.getOperand(0));
+              return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+                                                       NewExt, NewSA));
+            }
           }
         }
       }
 
-      Known.Zero <<= SA->getZExtValue();
-      Known.One  <<= SA->getZExtValue();
+      Known.Zero <<= ShAmt;
+      Known.One  <<= ShAmt;
       // low bits known zero.
-      Known.Zero.setLowBits(SA->getZExtValue());
+      Known.Zero.setLowBits(ShAmt);
     }
     break;
   case ISD::SRL:
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      EVT VT = Op.getValueType();
-      unsigned ShAmt = SA->getZExtValue();
-      unsigned VTSize = VT.getSizeInBits();
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
       SDValue InOp = Op.getOperand(0);
 
       // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
+      if (SA->getAPIntValue().uge(BitWidth))
         break;
 
+      unsigned ShAmt = SA->getZExtValue();
       APInt InDemandedMask = (NewMask << ShAmt);
 
       // If the shift is exact, then it does demand the low bits (and knows that
@@ -886,21 +889,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
       // single shift.  We can do this if the top bits (which are shifted out)
       // are never demanded.
-      if (InOp.getOpcode() == ISD::SHL &&
-          isa<ConstantSDNode>(InOp.getOperand(1))) {
-        if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
-          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
-          unsigned Opc = ISD::SRL;
-          int Diff = ShAmt-C1;
-          if (Diff < 0) {
-            Diff = -Diff;
-            Opc = ISD::SHL;
-          }
+      if (InOp.getOpcode() == ISD::SHL) {
+        if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+          if (ShAmt &&
+              (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+            if (SA2->getAPIntValue().ult(BitWidth)) {
+              unsigned C1 = SA2->getZExtValue();
+              unsigned Opc = ISD::SRL;
+              int Diff = ShAmt-C1;
+              if (Diff < 0) {
+                Diff = -Diff;
+                Opc = ISD::SHL;
+              }
 
-          SDValue NewSA =
-            TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
-          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
-                                                   InOp.getOperand(0), NewSA));
+              SDValue NewSA =
+                TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+              EVT VT = Op.getValueType();
+              return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                       InOp.getOperand(0),
+                                                       NewSA));
+            }
+          }
         }
       }
 
@@ -924,14 +933,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                            TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
                                            Op.getOperand(0), Op.getOperand(1)));
 
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
       EVT VT = Op.getValueType();
-      unsigned ShAmt = SA->getZExtValue();
 
       // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
+      if (SA->getAPIntValue().uge(BitWidth))
         break;
 
+      unsigned ShAmt = SA->getZExtValue();
       APInt InDemandedMask = (NewMask << ShAmt);
 
       // If the shift is exact, then it does demand the low bits (and knows that
@@ -979,15 +988,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   case ISD::SIGN_EXTEND_INREG: {
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    unsigned ExVTBits = ExVT.getScalarSizeInBits();
 
-    APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
     // If we only care about the highest bit, don't bother shifting right.
-    if (MsbMask == NewMask) {
-      unsigned ShAmt = ExVT.getScalarSizeInBits();
+    if (NewMask.isSignMask()) {
       SDValue InOp = Op.getOperand(0);
-      unsigned VTBits = Op->getValueType(0).getScalarSizeInBits();
       bool AlreadySignExtended =
-        TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+        TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
       // However if the input is already sign extended we expect the sign
       // extension to be dropped altogether later and do not simplify.
       if (!AlreadySignExtended) {
@@ -997,7 +1004,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
 
-        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
+        SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
                                                ShiftAmtTy);
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
                                                  Op.getValueType(), InOp,
@@ -1005,26 +1012,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       }
     }
 
-    // Sign extension.  Compute the demanded bits in the result that are not
-    // present in the input.
-    APInt NewBits =
-      APInt::getHighBitsSet(BitWidth,
-                            BitWidth - ExVT.getScalarSizeInBits());
-
     // If none of the extended bits are demanded, eliminate the sextinreg.
-    if ((NewBits & NewMask) == 0)
+    if (NewMask.getActiveBits() <= ExVTBits)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
-    APInt InSignBit =
-      APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth);
-    APInt InputDemandedBits =
-      APInt::getLowBitsSet(BitWidth,
-                           ExVT.getScalarSizeInBits()) &
-      NewMask;
+    APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
 
     // Since the sign extended bits are demanded, we know that the sign
     // bit is demanded.
-    InputDemandedBits |= InSignBit;
+    InputDemandedBits.setBit(ExVTBits - 1);
 
     if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
                              Known, TLO, Depth+1))
@@ -1035,16 +1031,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // top bits of the result.
 
     // If the input sign bit is known zero, convert this into a zero extension.
-    if (Known.Zero.intersects(InSignBit))
+    if (Known.Zero[ExVTBits - 1])
       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
                                    Op.getOperand(0), dl, ExVT.getScalarType()));
 
-    if (Known.One.intersects(InSignBit)) {    // Input sign bit known set
-      Known.One |= NewBits;
-      Known.Zero &= ~NewBits;
+    APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
+    if (Known.One[ExVTBits - 1]) {    // Input sign bit known set
+      Known.One.setBitsFrom(ExVTBits);
+      Known.Zero &= Mask;
     } else {                       // Input sign bit unknown
-      Known.Zero &= ~NewBits;
-      Known.One &= ~NewBits;
+      Known.Zero &= Mask;
+      Known.One &= Mask;
     }
     break;
   }
@@ -1072,61 +1069,47 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   }
   case ISD::ZERO_EXTEND: {
     unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
-    APInt InMask = NewMask.trunc(OperandBitWidth);
 
     // If none of the top bits are demanded, convert this into an any_extend.
-    APInt NewBits =
-      APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
-    if (!NewBits.intersects(NewMask))
+    if (NewMask.getActiveBits() <= OperandBitWidth)
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
                                                Op.getValueType(),
                                                Op.getOperand(0)));
 
+    APInt InMask = NewMask.trunc(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
       return true;
     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
     Known = Known.zext(BitWidth);
-    Known.Zero |= NewBits;
+    Known.Zero.setBitsFrom(OperandBitWidth);
     break;
   }
   case ISD::SIGN_EXTEND: {
-    EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarSizeInBits();
-    APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
-    APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1);
-    APInt NewBits   = ~InMask & NewMask;
+    unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
 
     // If none of the top bits are demanded, convert this into an any_extend.
-    if (NewBits == 0)
+    if (NewMask.getActiveBits() <= InBits)
       return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
                                               Op.getValueType(),
                                               Op.getOperand(0)));
 
     // Since some of the sign extended bits are demanded, we know that the sign
     // bit is demanded.
-    APInt InDemandedBits = InMask & NewMask;
-    InDemandedBits |= InSignBit;
-    InDemandedBits = InDemandedBits.trunc(InBits);
+    APInt InDemandedBits = NewMask.trunc(InBits);
+    InDemandedBits.setBit(InBits - 1);
 
     if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
                              Depth+1))
       return true;
-    Known = Known.zext(BitWidth);
+    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+    // If the sign bit is known one, the top bits match.
+    Known = Known.sext(BitWidth);
 
     // If the sign bit is known zero, convert this to a zero extend.
-    if (Known.Zero.intersects(InSignBit))
+    if (Known.isNonNegative())
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
                                                Op.getValueType(),
                                                Op.getOperand(0)));
-
-    // If the sign bit is known one, the top bits match.
-    if (Known.One.intersects(InSignBit)) {
-      Known.One |= NewBits;
-      assert((Known.Zero & NewBits) == 0);
-    } else {   // Otherwise, top bits aren't known.
-      assert((Known.One & NewBits) == 0);
-      assert((Known.Zero & NewBits) == 0);
-    }
     break;
   }
   case ISD::ANY_EXTEND: {
@@ -1305,6 +1288,19 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
   Known.resetAll();
 }
 
+void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
+                                                   KnownBits &Known,
+                                                   const APInt &DemandedElts,
+                                                   const SelectionDAG &DAG,
+                                                   unsigned Depth) const {
+  assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
+
+  if (unsigned Align = DAG.InferPtrAlignment(Op)) {
+    // The low bits are known zero if the pointer is aligned.
+    Known.Zero.setLowBits(Log2_32(Align));
+  }
+}
+
 /// This method can be implemented by targets that want to expose additional
 /// information about sign bits to the DAG Combiner.
 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
@@ -2967,7 +2963,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                       SelectionDAG &DAG,
                                       std::vector<SDNode *> *Created) const {
-  AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
     return SDValue(N,0); // Lower SDIV as SDIV
@@ -3436,8 +3432,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
   // The type of data as saved in memory.
   EVT MemSclVT = StVT.getScalarType();
 
-  EVT PtrVT = BasePtr.getValueType();
-
   // Store Stride in bytes
   unsigned Stride = MemSclVT.getSizeInBits() / 8;
   EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
@@ -3450,8 +3444,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
                               DAG.getConstant(Idx, SL, IdxVT));
 
-    SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
-                              DAG.getConstant(Idx * Stride, SL, PtrVT));
+    SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
 
     // This scalar TruncStore may be illegal, but we legalize it later.
     SDValue Store = DAG.getTruncStore(
@@ -3474,6 +3467,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
   EVT VT = LD->getValueType(0);
   EVT LoadedVT = LD->getMemoryVT();
   SDLoc dl(LD);
+  auto &MF = DAG.getMachineFunction();
+
   if (VT.isFloatingPoint() || VT.isVector()) {
     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
@@ -3498,13 +3493,13 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
     // Copy the value to a (aligned) stack slot using (unaligned) integer
     // loads and stores, then do a (aligned) load from the stack slot.
     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
-    unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+    unsigned LoadedBytes = LoadedVT.getStoreSize();
     unsigned RegBytes = RegVT.getSizeInBits() / 8;
     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
 
     // Make sure the stack slot is also aligned for the register type.
     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
+    auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
     SmallVector<SDValue, 8> Stores;
     SDValue StackPtr = StackBase;
     unsigned Offset = 0;
@@ -3523,13 +3518,14 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
           LD->getAAInfo());
       // Follow the load with a store to the stack slot.  Remember the store.
-      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
-                                    MachinePointerInfo()));
+      Stores.push_back(DAG.getStore(
+          Load.getValue(1), dl, Load, StackPtr,
+          MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
       // Increment the pointers.
       Offset += RegBytes;
-      Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
-      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr,
-                             StackPtrIncrement);
+
+      Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
+      StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
     }
 
     // The last copy may be partial.  Do an extending load.
@@ -3543,15 +3539,17 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
     // Follow the load with a store to the stack slot.  Remember the store.
     // On big-endian machines this requires a truncating store to ensure
     // that the bits end up in the right place.
-    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
-                                       MachinePointerInfo(), MemVT));
+    Stores.push_back(DAG.getTruncStore(
+        Load.getValue(1), dl, Load, StackPtr,
+        MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
 
     // The order of the stores doesn't matter - say it with a TokenFactor.
     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
 
     // Finally, perform the original load only redirected to the stack slot.
     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
-                          MachinePointerInfo(), LoadedVT);
+                          MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
+                          LoadedVT);
 
     // Callers expect a MERGE_VALUES node.
     return std::make_pair(Load, TF);
@@ -3581,8 +3579,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
                         LD->getAAInfo());
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+
+    Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
@@ -3591,8 +3589,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
                         LD->getAAInfo());
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+
+    Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
@@ -3621,6 +3619,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
   SDValue Val = ST->getValue();
   EVT VT = Val.getValueType();
   int Alignment = ST->getAlignment();
+  auto &MF = DAG.getMachineFunction();
 
   SDLoc dl(ST);
   if (ST->getMemoryVT().isFloatingPoint() ||
@@ -3649,16 +3648,18 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
                       EVT::getIntegerVT(*DAG.getContext(),
                                         StoredVT.getSizeInBits()));
     EVT PtrVT = Ptr.getValueType();
-    unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+    unsigned StoredBytes = StoredVT.getStoreSize();
     unsigned RegBytes = RegVT.getSizeInBits() / 8;
     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
 
     // Make sure the stack slot is also aligned for the register type.
     SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+    auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
 
     // Perform the original store, only redirected to the stack slot.
-    SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr,
-                                      MachinePointerInfo(), StoredVT);
+    SDValue Store = DAG.getTruncStore(
+        Chain, dl, Val, StackPtr,
+        MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
 
     EVT StackPtrVT = StackPtr.getValueType();
 
@@ -3670,8 +3671,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
     // Do all but one copies using the full register width.
     for (unsigned i = 1; i < NumRegs; i++) {
       // Load one integer register's worth from the stack slot.
-      SDValue Load =
-          DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo());
+      SDValue Load = DAG.getLoad(
+          RegVT, dl, Store, StackPtr,
+          MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
       // Store it to the final location.  Remember the store.
       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
                                     ST->getPointerInfo().getWithOffset(Offset),
@@ -3679,9 +3681,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
                                     ST->getMemOperand()->getFlags()));
       // Increment the pointers.
       Offset += RegBytes;
-      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT,
-                             StackPtr, StackPtrIncrement);
-      Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
+      StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
+      Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
     }
 
     // The last store may be partial.  Do a truncating store.  On big-endian
@@ -3691,8 +3692,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
                                   8 * (StoredBytes - Offset));
 
     // Load from the stack slot.
-    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
-                                  MachinePointerInfo(), MemVT);
+    SDValue Load = DAG.getExtLoad(
+        ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+        MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
 
     Stores.push_back(
         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
@@ -3726,9 +3728,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
                              ST->getMemOperand()->getFlags());
 
-  EVT PtrVT = Ptr.getValueType();
-  Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
-                    DAG.getConstant(IncrementSize, dl, PtrVT));
+  Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(
       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
@@ -3767,7 +3767,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
                                     AddrVT);
     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
   } else
-    Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT);
+    Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
 
   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
 }
@@ -3797,7 +3797,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
                                                 SDValue Index) const {
   SDLoc dl(Index);
   // Make sure the index type is big enough to compute in.
-  Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout()));
+  Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
 
   EVT EltVT = VecVT.getVectorElementType();
 
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 7b60d22c7ace..25d405bf63de 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -1,4 +1,4 @@
-//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===//
+//===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,14 +16,31 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include <cassert>
+#include <cstddef>
+#include <string>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -34,12 +51,11 @@ namespace {
 class ShadowStackGCLowering : public FunctionPass {
   /// RootChain - This is the global linked-list that contains the chain of GC
   /// roots.
-  GlobalVariable *Head;
+  GlobalVariable *Head = nullptr;
 
   /// StackEntryTy - Abstract type of a link in the shadow stack.
-  ///
-  StructType *StackEntryTy;
-  StructType *FrameMapTy;
+  StructType *StackEntryTy = nullptr;
+  StructType *FrameMapTy = nullptr;
 
   /// Roots - GC roots in the current function. Each is a pair of the
   /// intrinsic call and its corresponding alloca.
@@ -47,6 +63,7 @@ class ShadowStackGCLowering : public FunctionPass {
 
 public:
   static char ID;
+
   ShadowStackGCLowering();
 
   bool doInitialization(Module &M) override;
@@ -57,6 +74,7 @@ private:
   Constant *GetFrameMap(Function &F);
   Type *GetConcreteStackEntryType(Function &F);
   void CollectRoots(Function &F);
+
   static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
                                       Type *Ty, Value *BasePtr, int Idx1,
                                       const char *Name);
@@ -64,7 +82,10 @@ private:
                                       Type *Ty, Value *BasePtr, int Idx1, int Idx2,
                                       const char *Name);
 };
-}
+
+} // end anonymous namespace
+
+char ShadowStackGCLowering::ID = 0;
 
 INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
                       "Shadow Stack GC Lowering", false, false)
@@ -74,11 +95,7 @@ INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
 
 FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
 
-char ShadowStackGCLowering::ID = 0;
-
-ShadowStackGCLowering::ShadowStackGCLowering()
-  : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr),
-    FrameMapTy(nullptr) {
+ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) {
   initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
 }
 
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index aa75f5e2caa2..b35bf6ba3a7b 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -1,4 +1,4 @@
-//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===//
+//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -45,50 +45,58 @@
 //
 // If this pass found points matching all these properties, then
 // MachineFrameInfo is updated with this information.
+//
 //===----------------------------------------------------------------------===//
+
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-// To check for profitability.
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-// For property #1 for Save.
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-// To record the result of the analysis.
 #include "llvm/CodeGen/MachineFrameInfo.h"
-// For property #2.
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-// For property #1 for Restore.
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/Passes.h"
-// To know about callee-saved.
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-// To query the target about frame lowering.
-#include "llvm/Target/TargetFrameLowering.h"
-// To know about frame setup operation.
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
-// To access TargetInstrInfo.
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-#define DEBUG_TYPE "shrink-wrap"
+#include <cassert>
+#include <cstdint>
+#include <memory>
 
 using namespace llvm;
 
+#define DEBUG_TYPE "shrink-wrap"
+
 STATISTIC(NumFunc, "Number of functions");
 STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
 STATISTIC(NumCandidatesDropped,
           "Number of shrink-wrapping candidates dropped because of frequency");
 
 static cl::opt<cl::boolOrDefault>
-    EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
-                        cl::desc("enable the shrink-wrapping pass"));
+EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+                    cl::desc("enable the shrink-wrapping pass"));
 
 namespace {
+
 /// \brief Class to determine where the safe point to insert the
 /// prologue and epilogue are.
 /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
@@ -101,31 +109,42 @@ class ShrinkWrap : public MachineFunctionPass {
   RegisterClassInfo RCI;
   MachineDominatorTree *MDT;
   MachinePostDominatorTree *MPDT;
+
   /// Current safe point found for the prologue.
   /// The prologue will be inserted before the first instruction
   /// in this basic block.
   MachineBasicBlock *Save;
+
   /// Current safe point found for the epilogue.
   /// The epilogue will be inserted before the first terminator instruction
   /// in this basic block.
   MachineBasicBlock *Restore;
+
   /// Hold the information of the basic block frequency.
   /// Use to check the profitability of the new points.
   MachineBlockFrequencyInfo *MBFI;
+
   /// Hold the loop information. Used to determine if Save and Restore
   /// are in the same loop.
   MachineLoopInfo *MLI;
+
   /// Frequency of the Entry block.
   uint64_t EntryFreq;
+
   /// Current opcode for frame setup.
   unsigned FrameSetupOpcode;
+
   /// Current opcode for frame destroy.
   unsigned FrameDestroyOpcode;
+
   /// Entry block.
   const MachineBasicBlock *Entry;
-  typedef SmallSetVector<unsigned, 16> SetOfRegs;
+
+  using SetOfRegs = SmallSetVector<unsigned, 16>;
+
   /// Registers that need to be saved for the current function.
   mutable SetOfRegs CurrentCSRs;
+
   /// Current MachineFunction.
   MachineFunction *MachineFunc;
 
@@ -205,9 +224,11 @@ public:
   /// the MachineFrameInfo attached to \p MF with the results.
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
-} // End anonymous namespace.
+
+} // end anonymous namespace
 
 char ShrinkWrap::ID = 0;
+
 char &llvm::ShrinkWrapID = ShrinkWrap::ID;
 
 INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
@@ -219,6 +240,10 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
 
 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
                                  RegScavenger *RS) const {
+  // Ignore DBG_VALUE and other meta instructions that must not affect codegen.
+  if (MI.isMetaInstruction())
+    return false;
+
   if (MI.getOpcode() == FrameSetupOpcode ||
       MI.getOpcode() == FrameDestroyOpcode) {
     DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
@@ -424,7 +449,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF,
 }
 
 bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
     return false;
 
   DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
@@ -537,16 +562,17 @@ bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
   switch (EnableShrinkWrapOpt) {
   case cl::BOU_UNSET:
     return TFI->enableShrinkWrapping(MF) &&
-      // Windows with CFI has some limitations that make it impossible
-      // to use shrink-wrapping.
-      !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
-      // Sanitizers look at the value of the stack at the location
-      // of the crash. Since a crash can happen anywhere, the
-      // frame must be lowered before anything else happen for the
-      // sanitizers to be able to get a correct stack frame.
-      !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
-        MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
-        MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory));
+           // Windows with CFI has some limitations that make it impossible
+           // to use shrink-wrapping.
+           !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+           // Sanitizers look at the value of the stack at the location
+           // of the crash. Since a crash can happen anywhere, the
+           // frame must be lowered before anything else happen for the
+           // sanitizers to be able to get a correct stack frame.
+           !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) ||
+             MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) ||
+             MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) ||
+             MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress));
   // If EnableShrinkWrap is set, it takes precedence on whatever the
   // target sets. The rational is that we assume we want to test
   // something related to shrink-wrapping.
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 3656832a7f1a..ea74c777e1e2 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -12,7 +12,6 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
@@ -264,7 +263,7 @@ LLVM_DUMP_METHOD void SlotIndexes::dump() const {
   }
 
   for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
-    dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+    dbgs() << "%bb." << i << "\t[" << MBBRanges[i].first << ';'
            << MBBRanges[i].second << ")\n";
 }
 #endif
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 0abe1c47da55..b989b54d4190 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -1,4 +1,4 @@
-//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -28,21 +28,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "SpillPlacement.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "spill-code-placement"
 
 char SpillPlacement::ID = 0;
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
 INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
                       "Spill Code Placement Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
@@ -50,8 +60,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
                     "Spill Code Placement Analysis", true, true)
 
-char &llvm::SpillPlacementID = SpillPlacement::ID;
-
 void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<MachineBlockFrequencyInfo>();
@@ -68,10 +76,10 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
 /// The node Value is positive when the variable should be in a register. The
 /// value can change when linked nodes change, but convergence is very fast
 /// because all weights are positive.
-///
 struct SpillPlacement::Node {
   /// BiasN - Sum of blocks that prefer a spill.
   BlockFrequency BiasN;
+
   /// BiasP - Sum of blocks that prefer a register.
   BlockFrequency BiasP;
 
@@ -80,7 +88,7 @@ struct SpillPlacement::Node {
   /// variable should go in a register through this bundle.
   int Value;
 
-  typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector;
+  using LinkVector = SmallVector<std::pair<BlockFrequency, unsigned>, 4>;
 
   /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
   /// bundles. The weights are all positive block frequencies.
@@ -104,7 +112,7 @@ struct SpillPlacement::Node {
   }
 
   /// clear - Reset per-query data, but preserve frequencies that only depend on
-  // the CFG.
+  /// the CFG.
   void clear(const BlockFrequency &Threshold) {
     BiasN = BiasP = Value = 0;
     SumLinkWeights = Threshold;
@@ -260,14 +268,14 @@ void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
 
     // Live-in to block?
     if (I->Entry != DontCare) {
-      unsigned ib = bundles->getBundle(I->Number, 0);
+      unsigned ib = bundles->getBundle(I->Number, false);
       activate(ib);
       nodes[ib].addBias(Freq, I->Entry);
     }
 
     // Live-out from block?
     if (I->Exit != DontCare) {
-      unsigned ob = bundles->getBundle(I->Number, 1);
+      unsigned ob = bundles->getBundle(I->Number, true);
       activate(ob);
       nodes[ob].addBias(Freq, I->Exit);
     }
@@ -281,8 +289,8 @@ void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
     BlockFrequency Freq = BlockFrequencies[*I];
     if (Strong)
       Freq += Freq;
-    unsigned ib = bundles->getBundle(*I, 0);
-    unsigned ob = bundles->getBundle(*I, 1);
+    unsigned ib = bundles->getBundle(*I, false);
+    unsigned ob = bundles->getBundle(*I, true);
     activate(ib);
     activate(ob);
     nodes[ib].addBias(Freq, PrefSpill);
@@ -294,8 +302,8 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
   for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
        ++I) {
     unsigned Number = *I;
-    unsigned ib = bundles->getBundle(Number, 0);
-    unsigned ob = bundles->getBundle(Number, 1);
+    unsigned ib = bundles->getBundle(Number, false);
+    unsigned ob = bundles->getBundle(Number, true);
 
     // Ignore self-loops.
     if (ib == ob)
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 9b9ecccf9049..aa3ac444e0da 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -1,4 +1,4 @@
-//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -37,9 +37,9 @@ namespace llvm {
 
 class BitVector;
 class EdgeBundles;
-class MachineBasicBlock;
-class MachineLoopInfo;
 class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineLoopInfo;
 
 class SpillPlacement : public MachineFunctionPass {
   struct Node;
@@ -47,7 +47,7 @@ class SpillPlacement : public MachineFunctionPass {
   const EdgeBundles *bundles;
   const MachineLoopInfo *loops;
   const MachineBlockFrequencyInfo *MBFI;
-  Node *nodes;
+  Node *nodes = nullptr;
 
   // Nodes that are active in the current computation. Owned by the prepare()
   // caller.
@@ -73,7 +73,7 @@ class SpillPlacement : public MachineFunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid.
 
-  SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
+  SpillPlacement() : MachineFunctionPass(ID) {}
   ~SpillPlacement() override { releaseMemory(); }
 
   /// BorderConstraint - A basic block has separate constraints for entry and
@@ -155,16 +155,16 @@ public:
   }
 
 private:
-  bool runOnMachineFunction(MachineFunction&) override;
-  void getAnalysisUsage(AnalysisUsage&) const override;
+  bool runOnMachineFunction(MachineFunction &mf) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
   void releaseMemory() override;
 
-  void activate(unsigned);
+  void activate(unsigned n);
   void setThreshold(const BlockFrequency &Entry);
 
-  bool update(unsigned);
+  bool update(unsigned n);
 };
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 61ee508c8394..330ee81342b6 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,11 +12,10 @@
 
 namespace llvm {
 
-  class LiveRangeEdit;
-  class MachineFunction;
-  class MachineFunctionPass;
-  class VirtRegMap;
-  class LiveIntervals;
+class LiveRangeEdit;
+class MachineFunction;
+class MachineFunctionPass;
+class VirtRegMap;
 
   /// Spiller interface.
   ///
@@ -24,12 +23,14 @@ namespace llvm {
   /// demand.
   class Spiller {
     virtual void anchor();
+
   public:
     virtual ~Spiller() = 0;
 
     /// spill - Spill the LRE.getParent() live interval.
     virtual void spill(LiveRangeEdit &LRE) = 0;
-    virtual void postOptimization(){};
+
+    virtual void postOptimization() {}
   };
 
   /// Create and return a spiller that will insert spill code directly instead
@@ -37,6 +38,7 @@ namespace llvm {
   Spiller *createInlineSpiller(MachineFunctionPass &pass,
                                MachineFunction &mf,
                                VirtRegMap &vrm);
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SPILLER_H
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 323045fd2aaa..c99c3b09d88a 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -1,4 +1,4 @@
-//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,20 +13,46 @@
 //===----------------------------------------------------------------------===//
 
 #include "SplitKit.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
 
 using namespace llvm;
 
@@ -125,8 +151,7 @@ InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI,
 SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
                              const MachineLoopInfo &mli)
     : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
-      TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr),
-      IPA(lis, MF.getNumBlockIDs()) {}
+      TII(*MF.getSubtarget().getInstrInfo()), IPA(lis, MF.getNumBlockIDs()) {}
 
 void SplitAnalysis::clear() {
   UseSlots.clear();
@@ -200,7 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
   // Loop over basic blocks where CurLI is live.
   MachineFunction::iterator MFI =
       LIS.getMBBFromIndex(LVI->start)->getIterator();
-  for (;;) {
+  while (true) {
     BlockInfo BI;
     BI.MBB = &*MFI;
     SlotIndex Start, Stop;
@@ -301,7 +326,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
   MachineFunction::const_iterator MFI =
       LIS.getMBBFromIndex(LVI->start)->getIterator();
   SlotIndex Stop = LIS.getMBBEndIdx(&*MFI);
-  for (;;) {
+  while (true) {
     ++Count;
     LVI = li->advanceTo(LVI, Stop);
     if (LVI == LVE)
@@ -333,7 +358,6 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
   analyzeUses();
 }
 
-
 //===----------------------------------------------------------------------===//
 //                               Split Editor
 //===----------------------------------------------------------------------===//
@@ -347,8 +371,7 @@ SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa,
       MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt),
       TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
       TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()),
-      MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition),
-      RegAssign(Allocator) {}
+      MBFI(mbfi), RegAssign(Allocator) {}
 
 void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
   Edit = &LRE;
@@ -552,7 +575,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
     if ((SubRegMask & ~LaneMask).any())
       continue;
 
-    unsigned PopCount = countPopulation(SubRegMask.getAsInteger());
+    unsigned PopCount = SubRegMask.getNumLanes();
     PossibleIndexes.push_back(Idx);
     if (PopCount > BestCover) {
       BestCover = PopCount;
@@ -572,7 +595,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
   LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx));
   while (LanesLeft.any()) {
     unsigned BestIdx = 0;
-    int BestCover = INT_MIN;
+    int BestCover = std::numeric_limits<int>::min();
     for (unsigned Idx : PossibleIndexes) {
       LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
       // Early exit if we found a perfect match.
@@ -583,8 +606,8 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
 
       // Try to cover as much of the remaining lanes as possible but
       // as few of the already covered lanes as possible.
-      int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger())
-                - countPopulation((SubRegMask & ~LanesLeft).getAsInteger());
+      int Cover = (SubRegMask & LanesLeft).getNumLanes()
+                - (SubRegMask & ~LanesLeft).getNumLanes();
       if (Cover > BestCover) {
         BestCover = Cover;
         BestIdx = Idx;
@@ -706,7 +729,8 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
   assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
   SlotIndex End = LIS.getMBBEndIdx(&MBB);
   SlotIndex Last = End.getPrevSlot();
-  DEBUG(dbgs() << "    enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+  DEBUG(dbgs() << "    enterIntvAtEnd " << printMBBReference(MBB) << ", "
+               << Last);
   VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
   if (!ParentVNI) {
     DEBUG(dbgs() << ": not live\n");
@@ -785,7 +809,8 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
 SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
   assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
   SlotIndex Start = LIS.getMBBStartIdx(&MBB);
-  DEBUG(dbgs() << "    leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+  DEBUG(dbgs() << "    leaveIntvAtTop " << printMBBReference(MBB) << ", "
+               << Start);
 
   VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
   if (!ParentVNI) {
@@ -875,23 +900,23 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
 
   // Best candidate so far.
   MachineBasicBlock *BestMBB = MBB;
-  unsigned BestDepth = UINT_MAX;
+  unsigned BestDepth = std::numeric_limits<unsigned>::max();
 
-  for (;;) {
+  while (true) {
     const MachineLoop *Loop = Loops.getLoopFor(MBB);
 
     // MBB isn't in a loop, it doesn't get any better.  All dominators have a
     // higher frequency by definition.
     if (!Loop) {
-      DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
-                   << MBB->getNumber() << " at depth 0\n");
+      DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
+                   << printMBBReference(*MBB) << " at depth 0\n");
       return MBB;
     }
 
     // We'll never be able to exit the DefLoop.
     if (Loop == DefLoop) {
-      DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
-                   << MBB->getNumber() << " in the same loop\n");
+      DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
+                   << printMBBReference(*MBB) << " in the same loop\n");
       return MBB;
     }
 
@@ -900,8 +925,8 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
     if (Depth < BestDepth) {
       BestMBB = MBB;
       BestDepth = Depth;
-      DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
-                   << MBB->getNumber() << " at depth " << Depth << '\n');
+      DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates "
+                   << printMBBReference(*MBB) << " at depth " << Depth << '\n');
     }
 
     // Leave loop by going to the immediate dominator of the loop header.
@@ -978,7 +1003,7 @@ void SplitEditor::hoistCopies() {
 
   // Track the nearest common dominator for all back-copies for each ParentVNI,
   // indexed by ParentVNI->id.
-  typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
+  using DomPair = std::pair<MachineBasicBlock *, SlotIndex>;
   SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
   // The total cost of all the back-copies for each ParentVNI.
   SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
@@ -1040,7 +1065,7 @@ void SplitEditor::hoistCopies() {
 
     DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
                  << " for parent " << ParentVNI->id << '@' << ParentVNI->def
-                 << " hoist to BB#" << Dom.first->getNumber() << ' '
+                 << " hoist to " << printMBBReference(*Dom.first) << ' '
                  << Dom.second << '\n');
   }
 
@@ -1088,7 +1113,6 @@ void SplitEditor::hoistCopies() {
   removeBackCopies(BackCopies);
 }
 
-
 /// transferValues - Transfer all possible values to the new live ranges.
 /// Values that were rematerialized are left alone, they need LRCalc.extend().
 bool SplitEditor::transferValues() {
@@ -1118,7 +1142,7 @@ bool SplitEditor::transferValues() {
 
       // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
       DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx
-                   << '(' << PrintReg(Edit->get(RegIdx)) << ')');
+                   << '(' << printReg(Edit->get(RegIdx)) << ')');
       LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
 
       // Check for a simply defined value that can be blitted directly.
@@ -1151,7 +1175,7 @@ bool SplitEditor::transferValues() {
       if (Start != BlockStart) {
         VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
         assert(VNI && "Missing def for complex mapped value");
-        DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+        DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB));
         // MBB has its own def. Is it also live-out?
         if (BlockEnd <= End)
           LRC.setLiveOutValue(&*MBB, VNI);
@@ -1164,7 +1188,7 @@ bool SplitEditor::transferValues() {
       // Handle the live-in blocks covered by [Start;End).
       assert(Start <= BlockStart && "Expected live-in block");
       while (BlockStart < End) {
-        DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+        DEBUG(dbgs() << ">" << printMBBReference(*MBB));
         BlockEnd = LIS.getMBBEndIdx(&*MBB);
         if (BlockStart == ParentVNI->def) {
           // This block has the def of a parent PHI, so it isn't live-in.
@@ -1276,6 +1300,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
   struct ExtPoint {
     ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N)
       : MO(O), RegIdx(R), Next(N) {}
+
     MachineOperand MO;
     unsigned RegIdx;
     SlotIndex Next;
@@ -1306,7 +1331,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
     unsigned RegIdx = RegAssign.lookup(Idx);
     LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
     MO.setReg(LI.reg);
-    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+    DEBUG(dbgs() << "  rewr " << printMBBReference(*MI->getParent()) << '\t'
                  << Idx << ':' << RegIdx << '\t' << *MI);
 
     // Extend liveness to Idx if the instruction reads reg.
@@ -1352,9 +1377,9 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
         continue;
       // The problem here can be that the new register may have been created
       // for a partially defined original register. For example:
-      //   %vreg827:subreg_hireg<def,read-undef> = ...
+      //   %0:subreg_hireg<def,read-undef> = ...
       //   ...
-      //   %vreg828<def> = COPY %vreg827
+      //   %1 = COPY %0
       if (S.empty())
         continue;
       SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
@@ -1486,7 +1511,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
   assert(!LRMap || LRMap->size() == Edit->size());
 }
 
-
 //===----------------------------------------------------------------------===//
 //                            Single Block Splitting
 //===----------------------------------------------------------------------===//
@@ -1524,7 +1548,6 @@ void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
   }
 }
 
-
 //===----------------------------------------------------------------------===//
 //                    Global Live Range Splitting Support
 //===----------------------------------------------------------------------===//
@@ -1542,9 +1565,9 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
   SlotIndex Start, Stop;
   std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
 
-  DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
-               << ") intf " << LeaveBefore << '-' << EnterAfter
-               << ", live-through " << IntvIn << " -> " << IntvOut);
+  DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop << ") intf "
+               << LeaveBefore << '-' << EnterAfter << ", live-through "
+               << IntvIn << " -> " << IntvOut);
 
   assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
 
@@ -1639,13 +1662,12 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
   assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
 }
 
-
 void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
                                   unsigned IntvIn, SlotIndex LeaveBefore) {
   SlotIndex Start, Stop;
   std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
-  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+  DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop
                << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
                << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
                << (BI.LiveOut ? ", stack-out" : ", killed in block"));
@@ -1737,7 +1759,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
   SlotIndex Start, Stop;
   std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
-  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+  DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop
                << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
                << ", reg-out " << IntvOut << ", enter after " << EnterAfter
                << (BI.LiveIn ? ", stack-in" : ", defined in block"));
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 9d409e924a3d..c0608893d4e5 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,4 +1,4 @@
-//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,26 +17,32 @@
 
 #include "LiveRangeCalc.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/Compiler.h"
+#include <utility>
 
 namespace llvm {
 
-class ConnectedVNInfoEqClasses;
-class LiveInterval;
 class LiveIntervals;
 class LiveRangeEdit;
 class MachineBlockFrequencyInfo;
-class MachineInstr;
+class MachineDominatorTree;
 class MachineLoopInfo;
 class MachineRegisterInfo;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 class VirtRegMap;
-class VNInfo;
-class raw_ostream;
 
 /// Determines the latest safe point in a block in which we can insert a split,
 /// spill or other instruction related with CurLI.
@@ -116,7 +122,7 @@ public:
 
 private:
   // Current live interval.
-  const LiveInterval *CurLI;
+  const LiveInterval *CurLI = nullptr;
 
   /// Insert Point Analysis.
   InsertPointAnalysis IPA;
@@ -200,7 +206,7 @@ public:
   /// analyze(li).
   unsigned countLiveBlocks(const LiveInterval *li) const;
 
-  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+  using BlockPtrSet = SmallPtrSet<const MachineBasicBlock *, 16>;
 
   /// shouldSplitSingleBlock - Returns true if it would help to create a local
   /// live range for the instructions in BI. There is normally no benefit to
@@ -221,7 +227,6 @@ public:
   }
 };
 
-
 /// SplitEditor - Edit machine code and LiveIntervals for live range
 /// splitting.
 ///
@@ -245,7 +250,6 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor {
   const MachineBlockFrequencyInfo &MBFI;
 
 public:
-
   /// ComplementSpillMode - Select how the complement live range should be
   /// created.  SplitEditor automatically creates interval 0 to contain
   /// anything that isn't added to another interval.  This complement interval
@@ -273,19 +277,18 @@ public:
   };
 
 private:
-
   /// Edit - The current parent register and new intervals created.
-  LiveRangeEdit *Edit;
+  LiveRangeEdit *Edit = nullptr;
 
   /// Index into Edit of the currently open interval.
   /// The index 0 is used for the complement, so the first interval started by
   /// openIntv will be 1.
-  unsigned OpenIdx;
+  unsigned OpenIdx = 0;
 
   /// The current spill mode, selected by reset().
-  ComplementSpillMode SpillMode;
+  ComplementSpillMode SpillMode = SM_Partition;
 
-  typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+  using RegAssignMap = IntervalMap<SlotIndex, unsigned>;
 
   /// Allocator for the interval map. This will eventually be shared with
   /// SlotIndexes and LiveIntervals.
@@ -296,8 +299,8 @@ private:
   /// Idx.
   RegAssignMap RegAssign;
 
-  typedef PointerIntPair<VNInfo*, 1> ValueForcePair;
-  typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap;
+  using ValueForcePair = PointerIntPair<VNInfo *, 1>;
+  using ValueMap = DenseMap<std::pair<unsigned, unsigned>, ValueForcePair>;
 
   /// Values - keep track of the mapping from parent values to values in the new
   /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
@@ -419,9 +422,9 @@ private:
 public:
   /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
   /// Newly created intervals will be appended to newIntervals.
-  SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, LiveIntervals&,
-              VirtRegMap&, MachineDominatorTree&,
-              MachineBlockFrequencyInfo &);
+  SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, LiveIntervals &lis,
+              VirtRegMap &vrm, MachineDominatorTree &mdt,
+              MachineBlockFrequencyInfo &mbfi);
 
   /// reset - Prepare for a new split.
   void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
@@ -536,6 +539,6 @@ public:
                         unsigned IntvOut, SlotIndex EnterAfter);
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_SPLITKIT_H
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index e5fc5402cb41..608845498b48 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -1,4 +1,4 @@
-//===-- StackColoring.cpp -------------------------------------------------===//
+//===- StackColoring.cpp --------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,35 +22,44 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <memory>
+#include <utility>
 
 using namespace llvm;
 
@@ -366,6 +375,7 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
 //
 
 namespace {
+
 /// StackColoring - A machine pass for merging disjoint stack allocations,
 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
 class StackColoring : public MachineFunctionPass {
@@ -378,32 +388,40 @@ class StackColoring : public MachineFunctionPass {
   struct BlockLifetimeInfo {
     /// Which slots BEGINs in each basic block.
     BitVector Begin;
+
     /// Which slots ENDs in each basic block.
     BitVector End;
+
     /// Which slots are marked as LIVE_IN, coming into each basic block.
     BitVector LiveIn;
+
     /// Which slots are marked as LIVE_OUT, coming out of each basic block.
     BitVector LiveOut;
   };
 
   /// Maps active slots (per bit) for each basic block.
-  typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
+  using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
   LivenessMap BlockLiveness;
 
   /// Maps serial numbers to basic blocks.
-  DenseMap<const MachineBasicBlock*, int> BasicBlocks;
+  DenseMap<const MachineBasicBlock *, int> BasicBlocks;
+
   /// Maps basic blocks to a serial number.
-  SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+  SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
 
   /// Maps slots to their use interval. Outside of this interval, slots
   /// values are either dead or `undef` and they will not be written to.
   SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+
   /// Maps slots to the points where they can become in-use.
   SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
+
   /// VNInfo is used for the construction of LiveIntervals.
   VNInfo::Allocator VNInfoAllocator;
+
   /// SlotIndex analysis object.
   SlotIndexes *Indexes;
+
   /// The stack protector object.
   StackProtector *SP;
 
@@ -424,13 +442,18 @@ class StackColoring : public MachineFunctionPass {
 
 public:
   static char ID;
+
   StackColoring() : MachineFunctionPass(ID) {
     initializeStackColoringPass(*PassRegistry::getPassRegistry());
   }
+
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnMachineFunction(MachineFunction &MF) override;
 
 private:
+  /// Used in collectMarkers
+  using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
+
   /// Debug.
   void dump() const;
   void dumpIntervals() const;
@@ -489,13 +512,12 @@ private:
   /// Map entries which point to other entries to their destination.
   ///   A->B->C becomes A->C.
   void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
-
-  /// Used in collectMarkers
-  typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap;
 };
+
 } // end anonymous namespace
 
 char StackColoring::ID = 0;
+
 char &llvm::StackColoringID = StackColoring::ID;
 
 INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
@@ -559,16 +581,13 @@ static inline int getStartOrEndSlot(const MachineInstr &MI)
   return -1;
 }
 
-//
 // At the moment the only way to end a variable lifetime is with
 // a VARIABLE_LIFETIME op (which can't contain a start). If things
 // change and the IR allows for a single inst that both begins
 // and ends lifetime(s), this interface will need to be reworked.
-//
 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
                                          SmallVector<int, 4> &slots,
-                                         bool &isStart)
-{
+                                         bool &isStart) {
   if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
       MI.getOpcode() == TargetOpcode::LIFETIME_END) {
     int Slot = getStartOrEndSlot(MI);
@@ -608,8 +627,7 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
   return false;
 }
 
-unsigned StackColoring::collectMarkers(unsigned NumSlot)
-{
+unsigned StackColoring::collectMarkers(unsigned NumSlot) {
   unsigned MarkersFound = 0;
   BlockBitVecMap SeenStartMap;
   InterestingSlots.clear();
@@ -624,7 +642,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
   // Step 1: collect markers and populate the "InterestingSlots"
   // and "ConservativeSlots" sets.
   for (MachineBasicBlock *MBB : depth_first(MF)) {
-
     // Compute the set of slots for which we've seen a START marker but have
     // not yet seen an END marker at this point in the walk (e.g. on entry
     // to this bb).
@@ -697,7 +714,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
   // NOTE: We use a depth-first iteration to ensure that we obtain a
   // deterministic numbering.
   for (MachineBasicBlock *MBB : depth_first(MF)) {
-
     // Assign a serial number to this basic block.
     BasicBlocks[MBB] = BasicBlockNumbering.size();
     BasicBlockNumbering.push_back(MBB);
@@ -723,7 +739,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
         } else {
           for (auto Slot : slots) {
             DEBUG(dbgs() << "Found a use of slot #" << Slot);
-            DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
+            DEBUG(dbgs() << " at " << printMBBReference(*MBB) << " index ");
             DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
             const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
             if (Allocation) {
@@ -745,8 +761,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
   return MarkersFound;
 }
 
-void StackColoring::calculateLocalLiveness()
-{
+void StackColoring::calculateLocalLiveness() {
   unsigned NumIters = 0;
   bool changed = true;
   while (changed) {
@@ -754,7 +769,6 @@ void StackColoring::calculateLocalLiveness()
     ++NumIters;
 
     for (const MachineBasicBlock *BB : BasicBlockNumbering) {
-
       // Use an iterator to avoid repeated lookups.
       LivenessMap::iterator BI = BlockLiveness.find(BB);
       assert(BI != BlockLiveness.end() && "Block not found");
@@ -792,7 +806,7 @@ void StackColoring::calculateLocalLiveness()
         BlockInfo.LiveOut |= LocalLiveOut;
       }
     }
-  }// while changed.
+  } // while changed.
 
   NumIterations = NumIters;
 }
@@ -818,7 +832,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
 
     // Create the interval for the basic blocks containing lifetime begin/end.
     for (const MachineInstr &MI : MBB) {
-
       SmallVector<int, 4> slots;
       bool IsStart = false;
       if (!isLifetimeStartOrEnd(MI, slots, IsStart))
@@ -1047,7 +1060,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
   if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
     for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
       for (WinEHHandlerType &H : TBME.HandlerArray)
-        if (H.CatchObj.FrameIndex != INT_MAX &&
+        if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
             SlotRemap.count(H.CatchObj.FrameIndex))
           H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
 
@@ -1116,8 +1129,7 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
 
 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   DEBUG(dbgs() << "********** Stack Coloring **********\n"
-               << "********** Function: "
-               << ((const Value*)Func.getFunction())->getName() << '\n');
+               << "********** Function: " << Func.getName() << '\n');
   MF = &Func;
   MFI = &MF->getFrameInfo();
   Indexes = &getAnalysis<SlotIndexes>();
@@ -1157,7 +1169,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   // Don't continue because there are not enough lifetime markers, or the
   // stack is too small, or we are told not to optimize the slots.
   if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
-      skipFunction(*Func.getFunction())) {
+      skipFunction(Func.getFunction())) {
     DEBUG(dbgs()<<"Will not try to merge slots.\n");
     return removeAllMarkers();
   }
@@ -1231,7 +1243,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
         LiveInterval *Second = &*Intervals[SecondSlot];
         auto &FirstS = LiveStarts[FirstSlot];
         auto &SecondS = LiveStarts[SecondSlot];
-        assert (!First->empty() && !Second->empty() && "Found an empty range");
+        assert(!First->empty() && !Second->empty() && "Found an empty range");
 
         // Merge disjoint slots. This is a little bit tricky - see the
         // Implementation Notes section for an explanation.
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index a5ef7c8229f5..cc9af92c395f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -19,10 +19,10 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index b4fa29d9a86b..e66a25bec911 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -16,6 +16,9 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -27,9 +30,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -41,7 +41,7 @@ using namespace llvm;
 #define DEBUG_TYPE "stackmaps"
 
 static cl::opt<int> StackMapVersion(
-    "stackmap-version", cl::init(3),
+    "stackmap-version", cl::init(3), cl::Hidden,
     cl::desc("Specify the stackmap encoding version (default = 3)"));
 
 const char *StackMaps::WSMP = "Stack Maps: ";
@@ -193,14 +193,14 @@ void StackMaps::print(raw_ostream &OS) {
       case Location::Register:
         OS << "Register ";
         if (TRI)
-          OS << TRI->getName(Loc.Reg);
+          OS << printReg(Loc.Reg, TRI);
         else
           OS << Loc.Reg;
         break;
       case Location::Direct:
         OS << "Direct ";
         if (TRI)
-          OS << TRI->getName(Loc.Reg);
+          OS << printReg(Loc.Reg, TRI);
         else
           OS << Loc.Reg;
         if (Loc.Offset)
@@ -209,7 +209,7 @@ void StackMaps::print(raw_ostream &OS) {
       case Location::Indirect:
         OS << "Indirect ";
         if (TRI)
-          OS << TRI->getName(Loc.Reg);
+          OS << printReg(Loc.Reg, TRI);
         else
           OS << Loc.Reg;
         OS << "+" << Loc.Offset;
@@ -233,7 +233,7 @@ void StackMaps::print(raw_ostream &OS) {
     for (const auto &LO : LiveOuts) {
       OS << WSMP << "\t\tLO " << Idx << ": ";
       if (TRI)
-        OS << TRI->getName(LO.Reg);
+        OS << printReg(LO.Reg, TRI);
       else
         OS << LO.Reg;
       OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte "
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index d8e7840a2576..62cef95a4af2 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -14,14 +14,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -42,10 +44,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <utility>
 
 using namespace llvm;
@@ -247,10 +247,12 @@ bool StackProtector::RequiresStackProtector() {
   OptimizationRemarkEmitter ORE(F);
 
   if (F->hasFnAttribute(Attribute::StackProtectReq)) {
-    ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
              << "Stack protection applied to function "
              << ore::NV("Function", F)
-             << " due to a function attribute or command-line switch");
+             << " due to a function attribute or command-line switch";
+    });
     NeedsProtector = true;
     Strong = true; // Use the same heuristic as strong to determine SSPLayout
   } else if (F->hasFnAttribute(Attribute::StackProtectStrong))
@@ -264,29 +266,31 @@ bool StackProtector::RequiresStackProtector() {
     for (const Instruction &I : BB) {
       if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
         if (AI->isArrayAllocation()) {
-          OptimizationRemark Remark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
-                                    &I);
-          Remark
-              << "Stack protection applied to function "
-              << ore::NV("Function", F)
-              << " due to a call to alloca or use of a variable length array";
+          auto RemarkBuilder = [&]() {
+            return OptimizationRemark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
+                                      &I)
+                   << "Stack protection applied to function "
+                   << ore::NV("Function", F)
+                   << " due to a call to alloca or use of a variable length "
+                      "array";
+          };
           if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
             if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
               // A call to alloca with size >= SSPBufferSize requires
               // stack protectors.
               Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
-              ORE.emit(Remark);
+              ORE.emit(RemarkBuilder);
               NeedsProtector = true;
             } else if (Strong) {
               // Require protectors for all alloca calls in strong mode.
               Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
-              ORE.emit(Remark);
+              ORE.emit(RemarkBuilder);
               NeedsProtector = true;
             }
           } else {
             // A call to alloca with a variable size requires protectors.
             Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
-            ORE.emit(Remark);
+            ORE.emit(RemarkBuilder);
             NeedsProtector = true;
           }
           continue;
@@ -296,11 +300,13 @@ bool StackProtector::RequiresStackProtector() {
         if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
           Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
                                                    : SSPLK_SmallArray));
-          ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
+          ORE.emit([&]() {
+            return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
                    << "Stack protection applied to function "
                    << ore::NV("Function", F)
                    << " due to a stack allocated buffer or struct containing a "
-                      "buffer");
+                      "buffer";
+          });
           NeedsProtector = true;
           continue;
         }
@@ -308,11 +314,13 @@ bool StackProtector::RequiresStackProtector() {
         if (Strong && HasAddressTaken(AI)) {
           ++NumAddrTaken;
           Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
-          ORE.emit(
-              OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I)
-              << "Stack protection applied to function "
-              << ore::NV("Function", F)
-              << " due to the address of a local variable being taken");
+          ORE.emit([&]() {
+            return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken",
+                                      &I)
+                   << "Stack protection applied to function "
+                   << ore::NV("Function", F)
+                   << " due to the address of a local variable being taken";
+          });
           NeedsProtector = true;
         }
       }
@@ -377,8 +385,12 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
 ///  - The epilogue checks the value stored in the prologue against the original
 ///    value. It calls __stack_chk_fail if they differ.
 bool StackProtector::InsertStackProtectors() {
+  // If the target wants to XOR the frame pointer into the guard value, it's
+  // impossible to emit the check in IR, so the target *must* support stack
+  // protection in SDAG.
   bool SupportsSelectionDAGSP =
-      EnableSelectionDAGSP && !TM->Options.EnableFastISel;
+      TLI->useStackGuardXorFP() ||
+      (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
   AllocaInst *AI = nullptr;       // Place on stack that stores the stack guard.
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 856bca19dee8..62f662d1ade4 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -1,4 +1,4 @@
-//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,22 +14,34 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/IR/Module.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
 #include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "stack-slot-coloring"
@@ -45,6 +57,7 @@ STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
 STATISTIC(NumDead,       "Number of trivially dead stack accesses eliminated");
 
 namespace {
+
   class StackSlotColoring : public MachineFunctionPass {
     LiveStacks* LS;
     MachineFrameInfo *MFI;
@@ -73,7 +86,7 @@ namespace {
     BitVector AllColors;
 
     // NextColor - Next "color" that's not yet used.
-    int NextColor;
+    int NextColor = -1;
 
     // UsedColors - "Colors" that have been assigned.
     BitVector UsedColors;
@@ -83,10 +96,10 @@ namespace {
 
   public:
     static char ID; // Pass identification
-    StackSlotColoring() :
-      MachineFunctionPass(ID), NextColor(-1) {
-        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
-      }
+
+    StackSlotColoring() : MachineFunctionPass(ID) {
+      initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+    }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
@@ -111,9 +124,11 @@ namespace {
                             MachineFunction &MF);
     bool RemoveDeadStores(MachineBasicBlock* MBB);
   };
+
 } // end anonymous namespace
 
 char StackSlotColoring::ID = 0;
+
 char &llvm::StackSlotColoringID = StackSlotColoring::ID;
 
 INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE,
@@ -125,14 +140,16 @@ INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE,
                 "Stack Slot Coloring", false, false)
 
 namespace {
-  // IntervalSorter - Comparison predicate that sort live intervals by
-  // their weight.
-  struct IntervalSorter {
-    bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
-      return LHS->weight > RHS->weight;
-    }
-  };
-}
+
+// IntervalSorter - Comparison predicate that sort live intervals by
+// their weight.
+struct IntervalSorter {
+  bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+    return LHS->weight > RHS->weight;
+  }
+};
+
+} // end anonymous namespace
 
 /// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
 /// references and update spill slot weights.
@@ -185,8 +202,10 @@ void StackSlotColoring::InitializeSlots() {
   UsedColors.resize(LastFI);
   Assignments.resize(LastFI);
 
-  typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair;
+  using Pair = std::iterator_traits<LiveStacks::iterator>::value_type;
+
   SmallVector<Pair *, 16> Intervals;
+
   Intervals.reserve(LS->getNumIntervals());
   for (auto &I : *LS)
     Intervals.push_back(&I);
@@ -229,10 +248,11 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
 }
 
 /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
-///
 int StackSlotColoring::ColorSlot(LiveInterval *li) {
   int Color = -1;
   bool Share = false;
+  int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+
   if (!DisableSharing) {
     // Check if it's possible to reuse any of the used colors.
     Color = UsedColors.find_first();
@@ -246,6 +266,11 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
     }
   }
 
+  if (Color != -1 && MFI->getStackID(Color) != MFI->getStackID(FI)) {
+    DEBUG(dbgs() << "cannot share FIs with different stack IDs\n");
+    Share = false;
+  }
+
   // Assign it to the first available color (assumed to be the best) if it's
   // not possible to share a used color with other objects.
   if (!Share) {
@@ -257,7 +282,6 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
 
   // Record the assignment.
   Assignments[Color].push_back(li);
-  int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
   DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
 
   // Change size and alignment of the allocated slot. If there are multiple
@@ -364,7 +388,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
   // The MachineMemOperands have already been updated.
 }
 
-
 /// RemoveDeadStores - Scan through a basic block and look for loads followed
 /// by stores.  If they're both using the same stack slot, then the store is
 /// definitely dead.  This could obviously be much more aggressive (consider
@@ -426,7 +449,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
   return changed;
 }
 
-
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
       dbgs() << "********** Stack Slot Coloring **********\n"
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 489a607eb176..df1eebf43b2b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -15,6 +15,8 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TailDuplicator.h"
 #include "llvm/Pass.h"
 
@@ -47,12 +49,15 @@ char &llvm::TailDuplicateID = TailDuplicatePass::ID;
 INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false)
 
 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()))
+  if (skipFunction(MF.getFunction()))
     return false;
 
   auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
-  Duplicator.initMF(MF, MBPI, /* LayoutMode */ false);
+  // TODO: Querying isSSA() to determine pre-/post-regalloc is fragile, better
+  // split this into two passes instead.
+  bool PreRegAlloc = MF.getRegInfo().isSSA();
+  Duplicator.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ false);
 
   bool MadeChange = false;
   while (Duplicator.tailDuplicateBlocks())
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index dc7265dcf6c2..f51c884839b3 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -12,13 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/TailDuplicator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,16 +28,15 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
-#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
@@ -75,7 +75,7 @@ static cl::opt<bool>
 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
                                       cl::Hidden);
 
-void TailDuplicator::initMF(MachineFunction &MFin,
+void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
                             const MachineBranchProbabilityInfo *MBPIin,
                             bool LayoutModeIn, unsigned TailDupSizeIn) {
   MF = &MFin;
@@ -89,7 +89,7 @@ void TailDuplicator::initMF(MachineFunction &MFin,
   assert(MBPI != nullptr && "Machine Branch Probability Info required");
 
   LayoutMode = LayoutModeIn;
-  PreRegAlloc = MRI->isSSA();
+  this->PreRegAlloc = PreRegAlloc;
 }
 
 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
@@ -111,9 +111,10 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
           }
         }
         if (!Found) {
-          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
-          dbgs() << "  missing input from predecessor BB#"
-                 << PredBB->getNumber() << '\n';
+          dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+                 << *MI;
+          dbgs() << "  missing input from predecessor "
+                 << printMBBReference(*PredBB) << '\n';
           llvm_unreachable(nullptr);
         }
       }
@@ -121,15 +122,16 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
       for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
         MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
         if (CheckExtra && !Preds.count(PHIBB)) {
-          dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": "
-                 << *MI;
-          dbgs() << "  extra input from predecessor BB#" << PHIBB->getNumber()
-                 << '\n';
+          dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB)
+                 << ": " << *MI;
+          dbgs() << "  extra input from predecessor "
+                 << printMBBReference(*PHIBB) << '\n';
           llvm_unreachable(nullptr);
         }
         if (PHIBB->getNumber() < 0) {
-          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
-          dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
+          dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+                 << *MI;
+          dbgs() << "  non-existing " << printMBBReference(*PHIBB) << '\n';
           llvm_unreachable(nullptr);
         }
       }
@@ -369,10 +371,10 @@ void TailDuplicator::duplicateInstruction(
     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
     DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
     const DenseSet<unsigned> &UsedByPhi) {
-  MachineInstr *NewMI = TII->duplicate(*MI, *MF);
+  MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
   if (PreRegAlloc) {
-    for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = NewMI->getOperand(i);
+    for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = NewMI.getOperand(i);
       if (!MO.isReg())
         continue;
       unsigned Reg = MO.getReg();
@@ -443,7 +445,6 @@ void TailDuplicator::duplicateInstruction(
       }
     }
   }
-  PredBB->insert(PredBB->instr_end(), NewMI);
 }
 
 /// After FromBB is tail duplicated into its predecessor blocks, the successors
@@ -549,7 +550,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   unsigned MaxDuplicateCount;
   if (TailDupSize == 0 &&
       TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF->getFunction()->optForSize())
+      MF->getFunction().optForSize())
     MaxDuplicateCount = 1;
   else if (TailDupSize == 0)
     MaxDuplicateCount = TailDuplicateSize;
@@ -784,7 +785,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
                                    MachineBasicBlock *ForcedLayoutPred,
                                    SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                                    SmallVectorImpl<MachineInstr *> &Copies) {
-  DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+  DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB)
+               << '\n');
 
   DenseSet<unsigned> UsedByPhi;
   getRegsUsedByPHIs(*TailBB, &UsedByPhi);
@@ -825,10 +827,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
     // Clone the contents of TailBB into PredBB.
     DenseMap<unsigned, RegSubRegPair> LocalVRMap;
     SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
-    // Use instr_iterator here to properly handle bundles, e.g.
-    // ARM Thumb2 IT block.
-    MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
-    while (I != TailBB->instr_end()) {
+    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+         I != E; /* empty */) {
       MachineInstr *MI = &*I;
       ++I;
       if (MI->isPHI()) {
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9dd98b4020d2..b2151eb49655 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -15,16 +15,16 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -32,7 +32,7 @@ TargetFrameLowering::~TargetFrameLowering() = default;
 
 /// The default implementation just looks at attribute "no-frame-pointer-elim".
 bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
-  auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim");
+  auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim");
   return Attr.getValueAsString() == "true";
 }
 
@@ -82,7 +82,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
     return;
 
   // In Naked functions we aren't going to save any registers.
-  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
     return;
 
   // Functions which call __builtin_unwind_init get all their registers saved.
@@ -99,7 +99,7 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
     const MachineFunction &MF) const {
   // When HHVM function is called, the stack is skewed as the return address
   // is removed from the stack before we enter the function.
-  if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM))
+  if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM))
     return MF.getTarget().getPointerSize();
 
   return 0;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 14c5adc0d898..db925f803db6 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -19,6 +19,9 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -26,10 +29,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cctype>
 
 using namespace llvm;
@@ -67,6 +67,11 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
   llvm_unreachable("Target didn't implement insertNoop!");
 }
 
+static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
+  return strncmp(Str, MAI.getCommentString().data(),
+                 MAI.getCommentString().size()) == 0;
+}
+
 /// Measure the specified inline asm to determine an approximation of its
 /// length.
 /// Comments (which run till the next SeparatorString or newline) do not
@@ -75,29 +80,46 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
 /// multiple instructions separated by SeparatorString or newlines.
 /// Variable-length instructions are not handled here; this function
 /// may be overloaded in the target code to do that.
+/// We implement a special case of the .space directive which takes only a
+/// single integer argument in base 10 that is the size in bytes. This is a
+/// restricted form of the GAS directive in that we only interpret
+/// simple--i.e. not a logical or arithmetic expression--size values without
+/// the optional fill value. This is primarily used for creating arbitrary
+/// sized inline asm blocks for testing purposes.
 unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
                                              const MCAsmInfo &MAI) const {
   // Count the number of instructions in the asm.
-  bool atInsnStart = true;
-  unsigned InstCount = 0;
+  bool AtInsnStart = true;
+  unsigned Length = 0;
   for (; *Str; ++Str) {
     if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
                                 strlen(MAI.getSeparatorString())) == 0) {
-      atInsnStart = true;
-    } else if (strncmp(Str, MAI.getCommentString().data(),
-                       MAI.getCommentString().size()) == 0) {
+      AtInsnStart = true;
+    } else if (isAsmComment(Str, MAI)) {
       // Stop counting as an instruction after a comment until the next
       // separator.
-      atInsnStart = false;
+      AtInsnStart = false;
     }
 
-    if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
-      ++InstCount;
-      atInsnStart = false;
+    if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+      unsigned AddLength = MAI.getMaxInstLength();
+      if (strncmp(Str, ".space", 6) == 0) {
+        char *EStr;
+        int SpaceSize;
+        SpaceSize = strtol(Str + 6, &EStr, 10);
+        SpaceSize = SpaceSize < 0 ? 0 : SpaceSize;
+        while (*EStr != '\n' && std::isspace(static_cast<unsigned char>(*EStr)))
+          ++EStr;
+        if (*EStr == '\0' || *EStr == '\n' ||
+            isAsmComment(EStr, MAI)) // Successfully parsed .space argument
+          AddLength = SpaceSize;
+      }
+      Length += AddLength;
+      AtInsnStart = false;
     }
   }
 
-  return InstCount * MAI.getMaxInstLength();
+  return Length;
 }
 
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
@@ -169,7 +191,7 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
   MachineInstr *CommutedMI = nullptr;
   if (NewMI) {
     // Create a new instruction.
-    MachineFunction &MF = *MI.getParent()->getParent();
+    MachineFunction &MF = *MI.getMF();
     CommutedMI = MF.CloneMachineInstr(&MI);
   } else {
     CommutedMI = &MI;
@@ -388,10 +410,11 @@ bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0,
   return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
-MachineInstr *TargetInstrInfo::duplicate(MachineInstr &Orig,
-                                         MachineFunction &MF) const {
+MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const {
   assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
-  return MF.CloneMachineInstr(&Orig);
+  MachineFunction &MF = *MBB.getParent();
+  return MF.CloneMachineInstrBundle(MBB, InsertBefore, Orig);
 }
 
 // If the COPY instruction in MI can be folded to a stack operation, return
@@ -415,7 +438,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
   assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
          "Cannot fold physregs");
 
-  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
   const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
 
   if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
@@ -495,21 +518,13 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
   return NewMI;
 }
 
-/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
-/// slot into the specified machine instruction for the specified operand(s).
-/// If this is possible, a new instruction is returned with the specified
-/// operand folded, otherwise NULL is returned. The client is responsible for
-/// removing the old instruction and adding the new one in the instruction
-/// stream.
 MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
                                                  ArrayRef<unsigned> Ops, int FI,
                                                  LiveIntervals *LIS) const {
   auto Flags = MachineMemOperand::MONone;
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    if (MI.getOperand(Ops[i]).isDef())
-      Flags |= MachineMemOperand::MOStore;
-    else
-      Flags |= MachineMemOperand::MOLoad;
+  for (unsigned OpIdx : Ops)
+    Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
+                                          : MachineMemOperand::MOLoad;
 
   MachineBasicBlock *MBB = MI.getParent();
   assert(MBB && "foldMemoryOperand needs an inserted instruction");
@@ -525,10 +540,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   if (Flags & MachineMemOperand::MOStore) {
     MemSize = MFI.getObjectSize(FI);
   } else {
-    for (unsigned Idx : Ops) {
+    for (unsigned OpIdx : Ops) {
       int64_t OpSize = MFI.getObjectSize(FI);
 
-      if (auto SubReg = MI.getOperand(Idx).getSubReg()) {
+      if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) {
         unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
         if (SubRegSize > 0 && !(SubRegSize % 8))
           OpSize = SubRegSize / 8;
@@ -590,6 +605,54 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   return &*--Pos;
 }
 
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
+                                                 ArrayRef<unsigned> Ops,
+                                                 MachineInstr &LoadMI,
+                                                 LiveIntervals *LIS) const {
+  assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+  for (unsigned OpIdx : Ops)
+    assert(MI.getOperand(OpIdx).isUse() && "Folding load into def!");
+#endif
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+
+  // Ask the target to do the actual folding.
+  MachineInstr *NewMI = nullptr;
+  int FrameIndex = 0;
+
+  if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
+       MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+       MI.getOpcode() == TargetOpcode::STATEPOINT) &&
+      isLoadFromStackSlot(LoadMI, FrameIndex)) {
+    // Fold stackmap/patchpoint.
+    NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+    if (NewMI)
+      NewMI = &*MBB.insert(MI, NewMI);
+  } else {
+    // Ask the target to do the actual folding.
+    NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
+  }
+
+  if (!NewMI)
+    return nullptr;
+
+  // Copy the memoperands from the load to the folded instruction.
+  if (MI.memoperands_empty()) {
+    NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
+  } else {
+    // Handle the rare case of folding multiple loads.
+    NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
+                                    E = LoadMI.memoperands_end();
+         I != E; ++I) {
+      NewMI->addMemOperand(MF, *I);
+    }
+  }
+  return NewMI;
+}
+
 bool TargetInstrInfo::hasReassociableOperands(
     const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
   const MachineOperand &Op1 = Inst.getOperand(1);
@@ -685,11 +748,13 @@ bool TargetInstrInfo::getMachineCombinerPatterns(
 
   return false;
 }
+
 /// Return true when a code sequence can improve loop throughput.
 bool
 TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
   return false;
 }
+
 /// Attempt the reassociation transformation to reduce critical path length.
 /// See the above comments before getMachineCombinerPatterns().
 void TargetInstrInfo::reassociateOps(
@@ -698,7 +763,7 @@ void TargetInstrInfo::reassociateOps(
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     SmallVectorImpl<MachineInstr *> &DelInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
-  MachineFunction *MF = Root.getParent()->getParent();
+  MachineFunction *MF = Root.getMF();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -781,7 +846,7 @@ void TargetInstrInfo::genAlternativeCodeSequence(
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     SmallVectorImpl<MachineInstr *> &DelInstrs,
     DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
-  MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+  MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
 
   // Select the previous instruction in the sequence based on the input pattern.
   MachineInstr *Prev = nullptr;
@@ -803,59 +868,9 @@ void TargetInstrInfo::genAlternativeCodeSequence(
   reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
 }
 
-/// foldMemoryOperand - Same as the previous version except it allows folding
-/// of any load and store from / to any address, not just from a specific
-/// stack slot.
-MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
-                                                 ArrayRef<unsigned> Ops,
-                                                 MachineInstr &LoadMI,
-                                                 LiveIntervals *LIS) const {
-  assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!");
-#ifndef NDEBUG
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    assert(MI.getOperand(Ops[i]).isUse() && "Folding load into def!");
-#endif
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-
-  // Ask the target to do the actual folding.
-  MachineInstr *NewMI = nullptr;
-  int FrameIndex = 0;
-
-  if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
-       MI.getOpcode() == TargetOpcode::PATCHPOINT ||
-       MI.getOpcode() == TargetOpcode::STATEPOINT) &&
-      isLoadFromStackSlot(LoadMI, FrameIndex)) {
-    // Fold stackmap/patchpoint.
-    NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
-    if (NewMI)
-      NewMI = &*MBB.insert(MI, NewMI);
-  } else {
-    // Ask the target to do the actual folding.
-    NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
-  }
-
-  if (!NewMI) return nullptr;
-
-  // Copy the memoperands from the load to the folded instruction.
-  if (MI.memoperands_empty()) {
-    NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
-  }
-  else {
-    // Handle the rare case of folding multiple loads.
-    NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-    for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
-                                    E = LoadMI.memoperands_end();
-         I != E; ++I) {
-      NewMI->addMemOperand(MF, *I);
-    }
-  }
-  return NewMI;
-}
-
 bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
     const MachineInstr &MI, AliasAnalysis *AA) const {
-  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineFunction &MF = *MI.getMF();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Remat clients assume operand 0 is the defined register.
@@ -933,7 +948,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
 }
 
 int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
-  const MachineFunction *MF = MI.getParent()->getParent();
+  const MachineFunction *MF = MI.getMF();
   const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
   bool StackGrowsDown =
     TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 3914ee514712..543c12eebb45 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1,4 +1,4 @@
-//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===//
+//===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,32 +13,55 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <cctype>
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+
 using namespace llvm;
 
 static cl::opt<bool> JumpIsExpensiveOverride(
@@ -78,186 +101,13 @@ static cl::opt<int> MinPercentageForPredictableBranch(
     cl::Hidden);
 
 /// InitLibcallNames - Set default libcall names.
-///
 static void InitLibcallNames(const char **Names, const Triple &TT) {
-  Names[RTLIB::SHL_I16] = "__ashlhi3";
-  Names[RTLIB::SHL_I32] = "__ashlsi3";
-  Names[RTLIB::SHL_I64] = "__ashldi3";
-  Names[RTLIB::SHL_I128] = "__ashlti3";
-  Names[RTLIB::SRL_I16] = "__lshrhi3";
-  Names[RTLIB::SRL_I32] = "__lshrsi3";
-  Names[RTLIB::SRL_I64] = "__lshrdi3";
-  Names[RTLIB::SRL_I128] = "__lshrti3";
-  Names[RTLIB::SRA_I16] = "__ashrhi3";
-  Names[RTLIB::SRA_I32] = "__ashrsi3";
-  Names[RTLIB::SRA_I64] = "__ashrdi3";
-  Names[RTLIB::SRA_I128] = "__ashrti3";
-  Names[RTLIB::MUL_I8] = "__mulqi3";
-  Names[RTLIB::MUL_I16] = "__mulhi3";
-  Names[RTLIB::MUL_I32] = "__mulsi3";
-  Names[RTLIB::MUL_I64] = "__muldi3";
-  Names[RTLIB::MUL_I128] = "__multi3";
-  Names[RTLIB::MULO_I32] = "__mulosi4";
-  Names[RTLIB::MULO_I64] = "__mulodi4";
-  Names[RTLIB::MULO_I128] = "__muloti4";
-  Names[RTLIB::SDIV_I8] = "__divqi3";
-  Names[RTLIB::SDIV_I16] = "__divhi3";
-  Names[RTLIB::SDIV_I32] = "__divsi3";
-  Names[RTLIB::SDIV_I64] = "__divdi3";
-  Names[RTLIB::SDIV_I128] = "__divti3";
-  Names[RTLIB::UDIV_I8] = "__udivqi3";
-  Names[RTLIB::UDIV_I16] = "__udivhi3";
-  Names[RTLIB::UDIV_I32] = "__udivsi3";
-  Names[RTLIB::UDIV_I64] = "__udivdi3";
-  Names[RTLIB::UDIV_I128] = "__udivti3";
-  Names[RTLIB::SREM_I8] = "__modqi3";
-  Names[RTLIB::SREM_I16] = "__modhi3";
-  Names[RTLIB::SREM_I32] = "__modsi3";
-  Names[RTLIB::SREM_I64] = "__moddi3";
-  Names[RTLIB::SREM_I128] = "__modti3";
-  Names[RTLIB::UREM_I8] = "__umodqi3";
-  Names[RTLIB::UREM_I16] = "__umodhi3";
-  Names[RTLIB::UREM_I32] = "__umodsi3";
-  Names[RTLIB::UREM_I64] = "__umoddi3";
-  Names[RTLIB::UREM_I128] = "__umodti3";
-
-  Names[RTLIB::NEG_I32] = "__negsi2";
-  Names[RTLIB::NEG_I64] = "__negdi2";
-  Names[RTLIB::ADD_F32] = "__addsf3";
-  Names[RTLIB::ADD_F64] = "__adddf3";
-  Names[RTLIB::ADD_F80] = "__addxf3";
-  Names[RTLIB::ADD_F128] = "__addtf3";
-  Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
-  Names[RTLIB::SUB_F32] = "__subsf3";
-  Names[RTLIB::SUB_F64] = "__subdf3";
-  Names[RTLIB::SUB_F80] = "__subxf3";
-  Names[RTLIB::SUB_F128] = "__subtf3";
-  Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
-  Names[RTLIB::MUL_F32] = "__mulsf3";
-  Names[RTLIB::MUL_F64] = "__muldf3";
-  Names[RTLIB::MUL_F80] = "__mulxf3";
-  Names[RTLIB::MUL_F128] = "__multf3";
-  Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
-  Names[RTLIB::DIV_F32] = "__divsf3";
-  Names[RTLIB::DIV_F64] = "__divdf3";
-  Names[RTLIB::DIV_F80] = "__divxf3";
-  Names[RTLIB::DIV_F128] = "__divtf3";
-  Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
-  Names[RTLIB::REM_F32] = "fmodf";
-  Names[RTLIB::REM_F64] = "fmod";
-  Names[RTLIB::REM_F80] = "fmodl";
-  Names[RTLIB::REM_F128] = "fmodl";
-  Names[RTLIB::REM_PPCF128] = "fmodl";
-  Names[RTLIB::FMA_F32] = "fmaf";
-  Names[RTLIB::FMA_F64] = "fma";
-  Names[RTLIB::FMA_F80] = "fmal";
-  Names[RTLIB::FMA_F128] = "fmal";
-  Names[RTLIB::FMA_PPCF128] = "fmal";
-  Names[RTLIB::POWI_F32] = "__powisf2";
-  Names[RTLIB::POWI_F64] = "__powidf2";
-  Names[RTLIB::POWI_F80] = "__powixf2";
-  Names[RTLIB::POWI_F128] = "__powitf2";
-  Names[RTLIB::POWI_PPCF128] = "__powitf2";
-  Names[RTLIB::SQRT_F32] = "sqrtf";
-  Names[RTLIB::SQRT_F64] = "sqrt";
-  Names[RTLIB::SQRT_F80] = "sqrtl";
-  Names[RTLIB::SQRT_F128] = "sqrtl";
-  Names[RTLIB::SQRT_PPCF128] = "sqrtl";
-  Names[RTLIB::LOG_F32] = "logf";
-  Names[RTLIB::LOG_F64] = "log";
-  Names[RTLIB::LOG_F80] = "logl";
-  Names[RTLIB::LOG_F128] = "logl";
-  Names[RTLIB::LOG_PPCF128] = "logl";
-  Names[RTLIB::LOG2_F32] = "log2f";
-  Names[RTLIB::LOG2_F64] = "log2";
-  Names[RTLIB::LOG2_F80] = "log2l";
-  Names[RTLIB::LOG2_F128] = "log2l";
-  Names[RTLIB::LOG2_PPCF128] = "log2l";
-  Names[RTLIB::LOG10_F32] = "log10f";
-  Names[RTLIB::LOG10_F64] = "log10";
-  Names[RTLIB::LOG10_F80] = "log10l";
-  Names[RTLIB::LOG10_F128] = "log10l";
-  Names[RTLIB::LOG10_PPCF128] = "log10l";
-  Names[RTLIB::EXP_F32] = "expf";
-  Names[RTLIB::EXP_F64] = "exp";
-  Names[RTLIB::EXP_F80] = "expl";
-  Names[RTLIB::EXP_F128] = "expl";
-  Names[RTLIB::EXP_PPCF128] = "expl";
-  Names[RTLIB::EXP2_F32] = "exp2f";
-  Names[RTLIB::EXP2_F64] = "exp2";
-  Names[RTLIB::EXP2_F80] = "exp2l";
-  Names[RTLIB::EXP2_F128] = "exp2l";
-  Names[RTLIB::EXP2_PPCF128] = "exp2l";
-  Names[RTLIB::SIN_F32] = "sinf";
-  Names[RTLIB::SIN_F64] = "sin";
-  Names[RTLIB::SIN_F80] = "sinl";
-  Names[RTLIB::SIN_F128] = "sinl";
-  Names[RTLIB::SIN_PPCF128] = "sinl";
-  Names[RTLIB::COS_F32] = "cosf";
-  Names[RTLIB::COS_F64] = "cos";
-  Names[RTLIB::COS_F80] = "cosl";
-  Names[RTLIB::COS_F128] = "cosl";
-  Names[RTLIB::COS_PPCF128] = "cosl";
-  Names[RTLIB::POW_F32] = "powf";
-  Names[RTLIB::POW_F64] = "pow";
-  Names[RTLIB::POW_F80] = "powl";
-  Names[RTLIB::POW_F128] = "powl";
-  Names[RTLIB::POW_PPCF128] = "powl";
-  Names[RTLIB::CEIL_F32] = "ceilf";
-  Names[RTLIB::CEIL_F64] = "ceil";
-  Names[RTLIB::CEIL_F80] = "ceill";
-  Names[RTLIB::CEIL_F128] = "ceill";
-  Names[RTLIB::CEIL_PPCF128] = "ceill";
-  Names[RTLIB::TRUNC_F32] = "truncf";
-  Names[RTLIB::TRUNC_F64] = "trunc";
-  Names[RTLIB::TRUNC_F80] = "truncl";
-  Names[RTLIB::TRUNC_F128] = "truncl";
-  Names[RTLIB::TRUNC_PPCF128] = "truncl";
-  Names[RTLIB::RINT_F32] = "rintf";
-  Names[RTLIB::RINT_F64] = "rint";
-  Names[RTLIB::RINT_F80] = "rintl";
-  Names[RTLIB::RINT_F128] = "rintl";
-  Names[RTLIB::RINT_PPCF128] = "rintl";
-  Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
-  Names[RTLIB::NEARBYINT_F64] = "nearbyint";
-  Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
-  Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
-  Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
-  Names[RTLIB::ROUND_F32] = "roundf";
-  Names[RTLIB::ROUND_F64] = "round";
-  Names[RTLIB::ROUND_F80] = "roundl";
-  Names[RTLIB::ROUND_F128] = "roundl";
-  Names[RTLIB::ROUND_PPCF128] = "roundl";
-  Names[RTLIB::FLOOR_F32] = "floorf";
-  Names[RTLIB::FLOOR_F64] = "floor";
-  Names[RTLIB::FLOOR_F80] = "floorl";
-  Names[RTLIB::FLOOR_F128] = "floorl";
-  Names[RTLIB::FLOOR_PPCF128] = "floorl";
-  Names[RTLIB::FMIN_F32] = "fminf";
-  Names[RTLIB::FMIN_F64] = "fmin";
-  Names[RTLIB::FMIN_F80] = "fminl";
-  Names[RTLIB::FMIN_F128] = "fminl";
-  Names[RTLIB::FMIN_PPCF128] = "fminl";
-  Names[RTLIB::FMAX_F32] = "fmaxf";
-  Names[RTLIB::FMAX_F64] = "fmax";
-  Names[RTLIB::FMAX_F80] = "fmaxl";
-  Names[RTLIB::FMAX_F128] = "fmaxl";
-  Names[RTLIB::FMAX_PPCF128] = "fmaxl";
-  Names[RTLIB::ROUND_F32] = "roundf";
-  Names[RTLIB::ROUND_F64] = "round";
-  Names[RTLIB::ROUND_F80] = "roundl";
-  Names[RTLIB::ROUND_F128] = "roundl";
-  Names[RTLIB::ROUND_PPCF128] = "roundl";
-  Names[RTLIB::COPYSIGN_F32] = "copysignf";
-  Names[RTLIB::COPYSIGN_F64] = "copysign";
-  Names[RTLIB::COPYSIGN_F80] = "copysignl";
-  Names[RTLIB::COPYSIGN_F128] = "copysignl";
-  Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
-  Names[RTLIB::FPEXT_F32_PPCF128] = "__gcc_stoq";
-  Names[RTLIB::FPEXT_F64_PPCF128] = "__gcc_dtoq";
-  Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
-  Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
-  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+#define HANDLE_LIBCALL(code, name) \
+  Names[RTLIB::code] = name;
+#include "llvm/CodeGen/RuntimeLibcalls.def"
+#undef HANDLE_LIBCALL
+
+  // A few names are different on particular architectures or environments.
   if (TT.isOSDarwin()) {
     // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
     // of the gnueabi-style __gnu_*_ieee.
@@ -268,264 +118,8 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
     Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
     Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
   }
-  Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2";
-  Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2";
-  Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2";
-  Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2";
-  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
-  Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
-  Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
-  Names[RTLIB::FPROUND_PPCF128_F32] = "__gcc_qtos";
-  Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
-  Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
-  Names[RTLIB::FPROUND_PPCF128_F64] = "__gcc_qtod";
-  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
-  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
-  Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
-  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
-  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
-  Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
-  Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
-  Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
-  Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
-  Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
-  Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
-  Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
-  Names[RTLIB::FPTOSINT_PPCF128_I32] = "__gcc_qtou";
-  Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
-  Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
-  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
-  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
-  Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
-  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
-  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
-  Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
-  Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
-  Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
-  Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
-  Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
-  Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
-  Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
-  Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
-  Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
-  Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
-  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
-  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
-  Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
-  Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
-  Names[RTLIB::SINTTOFP_I32_PPCF128] = "__gcc_itoq";
-  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
-  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
-  Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
-  Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
-  Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
-  Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
-  Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
-  Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
-  Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
-  Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
-  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
-  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
-  Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
-  Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
-  Names[RTLIB::UINTTOFP_I32_PPCF128] = "__gcc_utoq";
-  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
-  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
-  Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
-  Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
-  Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
-  Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
-  Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
-  Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
-  Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
-  Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
-  Names[RTLIB::OEQ_F32] = "__eqsf2";
-  Names[RTLIB::OEQ_F64] = "__eqdf2";
-  Names[RTLIB::OEQ_F128] = "__eqtf2";
-  Names[RTLIB::OEQ_PPCF128] = "__gcc_qeq";
-  Names[RTLIB::UNE_F32] = "__nesf2";
-  Names[RTLIB::UNE_F64] = "__nedf2";
-  Names[RTLIB::UNE_F128] = "__netf2";
-  Names[RTLIB::UNE_PPCF128] = "__gcc_qne";
-  Names[RTLIB::OGE_F32] = "__gesf2";
-  Names[RTLIB::OGE_F64] = "__gedf2";
-  Names[RTLIB::OGE_F128] = "__getf2";
-  Names[RTLIB::OGE_PPCF128] = "__gcc_qge";
-  Names[RTLIB::OLT_F32] = "__ltsf2";
-  Names[RTLIB::OLT_F64] = "__ltdf2";
-  Names[RTLIB::OLT_F128] = "__lttf2";
-  Names[RTLIB::OLT_PPCF128] = "__gcc_qlt";
-  Names[RTLIB::OLE_F32] = "__lesf2";
-  Names[RTLIB::OLE_F64] = "__ledf2";
-  Names[RTLIB::OLE_F128] = "__letf2";
-  Names[RTLIB::OLE_PPCF128] = "__gcc_qle";
-  Names[RTLIB::OGT_F32] = "__gtsf2";
-  Names[RTLIB::OGT_F64] = "__gtdf2";
-  Names[RTLIB::OGT_F128] = "__gttf2";
-  Names[RTLIB::OGT_PPCF128] = "__gcc_qgt";
-  Names[RTLIB::UO_F32] = "__unordsf2";
-  Names[RTLIB::UO_F64] = "__unorddf2";
-  Names[RTLIB::UO_F128] = "__unordtf2";
-  Names[RTLIB::UO_PPCF128] = "__gcc_qunord";
-  Names[RTLIB::O_F32] = "__unordsf2";
-  Names[RTLIB::O_F64] = "__unorddf2";
-  Names[RTLIB::O_F128] = "__unordtf2";
-  Names[RTLIB::O_PPCF128] = "__gcc_qunord";
-  Names[RTLIB::MEMCPY] = "memcpy";
-  Names[RTLIB::MEMMOVE] = "memmove";
-  Names[RTLIB::MEMSET] = "memset";
-  Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] =
-      "__llvm_memcpy_element_unordered_atomic_1";
-  Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] =
-      "__llvm_memcpy_element_unordered_atomic_2";
-  Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] =
-      "__llvm_memcpy_element_unordered_atomic_4";
-  Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] =
-      "__llvm_memcpy_element_unordered_atomic_8";
-  Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] =
-      "__llvm_memcpy_element_unordered_atomic_16";
-  Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] =
-      "__llvm_memmove_element_unordered_atomic_1";
-  Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] =
-      "__llvm_memmove_element_unordered_atomic_2";
-  Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] =
-      "__llvm_memmove_element_unordered_atomic_4";
-  Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] =
-      "__llvm_memmove_element_unordered_atomic_8";
-  Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] =
-      "__llvm_memmove_element_unordered_atomic_16";
-  Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] =
-      "__llvm_memset_element_unordered_atomic_1";
-  Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] =
-      "__llvm_memset_element_unordered_atomic_2";
-  Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] =
-      "__llvm_memset_element_unordered_atomic_4";
-  Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] =
-      "__llvm_memset_element_unordered_atomic_8";
-  Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] =
-      "__llvm_memset_element_unordered_atomic_16";
-  Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16";
-  Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
-  Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
-  Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
-  Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
-  Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16";
-  Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
-  Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
-  Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
-  Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
-  Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16";
-  Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1";
-  Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2";
-  Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4";
-  Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8";
-  Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16";
-  Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1";
-  Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2";
-  Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4";
-  Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8";
-  Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16";
-  Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1";
-  Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2";
-  Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4";
-  Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8";
-  Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16";
-  Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1";
-  Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2";
-  Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4";
-  Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
-  Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
-
-  Names[RTLIB::ATOMIC_LOAD] = "__atomic_load";
-  Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1";
-  Names[RTLIB::ATOMIC_LOAD_2] = "__atomic_load_2";
-  Names[RTLIB::ATOMIC_LOAD_4] = "__atomic_load_4";
-  Names[RTLIB::ATOMIC_LOAD_8] = "__atomic_load_8";
-  Names[RTLIB::ATOMIC_LOAD_16] = "__atomic_load_16";
-
-  Names[RTLIB::ATOMIC_STORE] = "__atomic_store";
-  Names[RTLIB::ATOMIC_STORE_1] = "__atomic_store_1";
-  Names[RTLIB::ATOMIC_STORE_2] = "__atomic_store_2";
-  Names[RTLIB::ATOMIC_STORE_4] = "__atomic_store_4";
-  Names[RTLIB::ATOMIC_STORE_8] = "__atomic_store_8";
-  Names[RTLIB::ATOMIC_STORE_16] = "__atomic_store_16";
-
-  Names[RTLIB::ATOMIC_EXCHANGE] = "__atomic_exchange";
-  Names[RTLIB::ATOMIC_EXCHANGE_1] = "__atomic_exchange_1";
-  Names[RTLIB::ATOMIC_EXCHANGE_2] = "__atomic_exchange_2";
-  Names[RTLIB::ATOMIC_EXCHANGE_4] = "__atomic_exchange_4";
-  Names[RTLIB::ATOMIC_EXCHANGE_8] = "__atomic_exchange_8";
-  Names[RTLIB::ATOMIC_EXCHANGE_16] = "__atomic_exchange_16";
-
-  Names[RTLIB::ATOMIC_COMPARE_EXCHANGE] = "__atomic_compare_exchange";
-  Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = "__atomic_compare_exchange_1";
-  Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = "__atomic_compare_exchange_2";
-  Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = "__atomic_compare_exchange_4";
-  Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = "__atomic_compare_exchange_8";
-  Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = "__atomic_compare_exchange_16";
-
-  Names[RTLIB::ATOMIC_FETCH_ADD_1] = "__atomic_fetch_add_1";
-  Names[RTLIB::ATOMIC_FETCH_ADD_2] = "__atomic_fetch_add_2";
-  Names[RTLIB::ATOMIC_FETCH_ADD_4] = "__atomic_fetch_add_4";
-  Names[RTLIB::ATOMIC_FETCH_ADD_8] = "__atomic_fetch_add_8";
-  Names[RTLIB::ATOMIC_FETCH_ADD_16] = "__atomic_fetch_add_16";
-  Names[RTLIB::ATOMIC_FETCH_SUB_1] = "__atomic_fetch_sub_1";
-  Names[RTLIB::ATOMIC_FETCH_SUB_2] = "__atomic_fetch_sub_2";
-  Names[RTLIB::ATOMIC_FETCH_SUB_4] = "__atomic_fetch_sub_4";
-  Names[RTLIB::ATOMIC_FETCH_SUB_8] = "__atomic_fetch_sub_8";
-  Names[RTLIB::ATOMIC_FETCH_SUB_16] = "__atomic_fetch_sub_16";
-  Names[RTLIB::ATOMIC_FETCH_AND_1] = "__atomic_fetch_and_1";
-  Names[RTLIB::ATOMIC_FETCH_AND_2] = "__atomic_fetch_and_2";
-  Names[RTLIB::ATOMIC_FETCH_AND_4] = "__atomic_fetch_and_4";
-  Names[RTLIB::ATOMIC_FETCH_AND_8] = "__atomic_fetch_and_8";
-  Names[RTLIB::ATOMIC_FETCH_AND_16] = "__atomic_fetch_and_16";
-  Names[RTLIB::ATOMIC_FETCH_OR_1] = "__atomic_fetch_or_1";
-  Names[RTLIB::ATOMIC_FETCH_OR_2] = "__atomic_fetch_or_2";
-  Names[RTLIB::ATOMIC_FETCH_OR_4] = "__atomic_fetch_or_4";
-  Names[RTLIB::ATOMIC_FETCH_OR_8] = "__atomic_fetch_or_8";
-  Names[RTLIB::ATOMIC_FETCH_OR_16] = "__atomic_fetch_or_16";
-  Names[RTLIB::ATOMIC_FETCH_XOR_1] = "__atomic_fetch_xor_1";
-  Names[RTLIB::ATOMIC_FETCH_XOR_2] = "__atomic_fetch_xor_2";
-  Names[RTLIB::ATOMIC_FETCH_XOR_4] = "__atomic_fetch_xor_4";
-  Names[RTLIB::ATOMIC_FETCH_XOR_8] = "__atomic_fetch_xor_8";
-  Names[RTLIB::ATOMIC_FETCH_XOR_16] = "__atomic_fetch_xor_16";
-  Names[RTLIB::ATOMIC_FETCH_NAND_1] = "__atomic_fetch_nand_1";
-  Names[RTLIB::ATOMIC_FETCH_NAND_2] = "__atomic_fetch_nand_2";
-  Names[RTLIB::ATOMIC_FETCH_NAND_4] = "__atomic_fetch_nand_4";
-  Names[RTLIB::ATOMIC_FETCH_NAND_8] = "__atomic_fetch_nand_8";
-  Names[RTLIB::ATOMIC_FETCH_NAND_16] = "__atomic_fetch_nand_16";
-
-  if (TT.isGNUEnvironment()) {
+
+  if (TT.isGNUEnvironment() || TT.isOSFuchsia()) {
     Names[RTLIB::SINCOS_F32] = "sincosf";
     Names[RTLIB::SINCOS_F64] = "sincos";
     Names[RTLIB::SINCOS_F80] = "sincosl";
@@ -533,11 +127,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
     Names[RTLIB::SINCOS_PPCF128] = "sincosl";
   }
 
-  if (!TT.isOSOpenBSD()) {
-    Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
+  if (TT.isOSOpenBSD()) {
+    Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr;
   }
-
-  Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize";
 }
 
 /// Set default libcall CallingConvs.
@@ -858,7 +450,6 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
 }
 
 /// InitCmpLibcallCCs - Set default comparison libcall CC.
-///
 static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
   memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
   CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
@@ -929,6 +520,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   MaxAtomicSizeInBitsSupported = 1024;
 
   MinCmpXchgSizeInBits = 0;
+  SupportsUnalignedAtomics = false;
 
   std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
 
@@ -1044,7 +636,6 @@ void TargetLoweringBase::initActions() {
 
   // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
   // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
-  //
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
 }
 
@@ -1156,7 +747,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
     // found, fallback to the usual mechanism of widening/splitting the
     // vector.
     EVT OldEltVT = EltVT;
-    while (1) {
+    while (true) {
       // Increase the bitwidth of the element to the next pow-of-two
       // (which is greater than 8 bits).
       EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
@@ -1184,7 +775,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
 
   // Try to widen the vector until a legal type is found.
   // If there is no wider legal type, split the vector.
-  while (1) {
+  while (true) {
     // Round up to the next power of 2.
     NumElts = (unsigned)NextPowerOf2(NumElts);
 
@@ -1276,7 +867,7 @@ MachineBasicBlock *
 TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
                                    MachineBasicBlock *MBB) const {
   MachineInstr *MI = &InitialMI;
-  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFunction &MF = *MI->getMF();
   MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // We're handling multiple types of operands here:
@@ -1495,7 +1086,7 @@ void TargetLoweringBase::computeRegisterProperties(
     bool IsLegalWiderType = false;
     LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
     switch (PreferredAction) {
-    case TypePromoteInteger: {
+    case TypePromoteInteger:
       // Try to promote the elements of integer vectors. If no legal
       // promotion was found, fall through to the widen-vector method.
       for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
@@ -1515,8 +1106,8 @@ void TargetLoweringBase::computeRegisterProperties(
       if (IsLegalWiderType)
         break;
       LLVM_FALLTHROUGH;
-    }
-    case TypeWidenVector: {
+
+    case TypeWidenVector:
       // Try to widen the vector.
       for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         MVT SVT = (MVT::SimpleValueType) nVT;
@@ -1533,7 +1124,7 @@ void TargetLoweringBase::computeRegisterProperties(
       if (IsLegalWiderType)
         break;
       LLVM_FALLTHROUGH;
-    }
+
     case TypeSplitVector:
     case TypeScalarizeVector: {
       MVT IntermediateVT;
@@ -1598,7 +1189,6 @@ MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
 /// This method returns the number of registers needed, and the VT for each
 /// register.  It also returns the VT and quantity of the intermediate values
 /// before they are promoted/expanded.
-///
 unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
                                                 EVT &IntermediateVT,
                                                 unsigned &NumIntermediates,
@@ -1911,7 +1501,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
 /// by AM is legal for this target, for a load/store of the specified type.
 bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
                                                const AddrMode &AM, Type *Ty,
-                                               unsigned AS) const {
+                                               unsigned AS, Instruction *I) const {
   // The default implementation of this implements a conservative RISCy, r+r and
   // r+i addr mode.
 
@@ -2002,8 +1592,8 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
 /// Get the reciprocal estimate attribute string for a function that will
 /// override the target defaults.
 static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  return F->getFnAttribute("reciprocal-estimates").getValueAsString();
+  const Function &F = MF.getFunction();
+  return F.getFnAttribute("reciprocal-estimates").getValueAsString();
 }
 
 /// Construct a string for the given reciprocal operation of the given type.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6922e33c8d6c..24d4baa31e1f 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -52,6 +52,7 @@
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -134,7 +135,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
                                                    ELF::SHT_PROGBITS, Flags, 0);
   unsigned Size = DL.getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(DL.getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0));
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::create(Size, getContext());
   Streamer.emitELFSize(Label, E);
@@ -168,8 +169,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
                                                            MMI, Streamer);
 }
 
-static SectionKind
-getELFKindForNamedSection(StringRef Name, SectionKind K) {
+static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
   // N.B.: The defaults used in here are no the same ones used in MC.
   // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
   // both gas and MC will produce a section with no flags. Given
@@ -531,10 +531,8 @@ static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
       Name = ".ctors";
     else
       Name = ".dtors";
-    if (Priority != 65535) {
-      Name += '.';
-      Name += utostr(65535 - Priority);
-    }
+    if (Priority != 65535)
+      raw_string_ostream(Name) << format(".%05u", 65535 - Priority);
     Type = ELF::SHT_PROGBITS;
   }
 
@@ -1213,16 +1211,38 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
   }
 }
 
+static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
+                                                   const Triple &T, bool IsCtor,
+                                                   unsigned Priority,
+                                                   const MCSymbol *KeySym,
+                                                   MCSectionCOFF *Default) {
+  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment())
+    return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
+
+  std::string Name = IsCtor ? ".ctors" : ".dtors";
+  if (Priority != 65535)
+    raw_string_ostream(Name) << format(".%05u", 65535 - Priority);
+
+  return Ctx.getAssociativeCOFFSection(
+      Ctx.getCOFFSection(Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                   COFF::IMAGE_SCN_MEM_READ |
+                                   COFF::IMAGE_SCN_MEM_WRITE,
+                         SectionKind::getData()),
+      KeySym, 0);
+}
+
 MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
-  return getContext().getAssociativeCOFFSection(
-      cast<MCSectionCOFF>(StaticCtorSection), KeySym, 0);
+  return getCOFFStaticStructorSection(getContext(), getTargetTriple(), true,
+                                      Priority, KeySym,
+                                      cast<MCSectionCOFF>(StaticCtorSection));
 }
 
 MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
-  return getContext().getAssociativeCOFFSection(
-      cast<MCSectionCOFF>(StaticDtorSection), KeySym, 0);
+  return getCOFFStaticStructorSection(getContext(), getTargetTriple(), false,
+                                      Priority, KeySym,
+                                      cast<MCSectionCOFF>(StaticDtorSection));
 }
 
 void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
@@ -1234,32 +1254,40 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
 //                                  Wasm
 //===----------------------------------------------------------------------===//
 
-static const Comdat *getWasmComdat(const GlobalValue *GV) {
+static void checkWasmComdat(const GlobalValue *GV) {
   const Comdat *C = GV->getComdat();
   if (!C)
-    return nullptr;
+    return;
 
-  if (C->getSelectionKind() != Comdat::Any)
-    report_fatal_error("Wasm COMDATs only support SelectionKind::Any, '" +
-                       C->getName() + "' cannot be lowered.");
+  // TODO(sbc): At some point we may need COMDAT support but currently
+  // they are not supported.
+  report_fatal_error("WebAssembly doesn't support COMDATs, '" + C->getName() +
+                     "' cannot be lowered.");
+}
 
-  return C;
+static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) {
+  // If we're told we have function data, then use that.
+  if (K.isText())
+    return SectionKind::getText();
+
+  // Otherwise, ignore whatever section type the generic impl detected and use
+  // a plain data section.
+  return SectionKind::getData();
 }
 
 MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
-  llvm_unreachable("getExplicitSectionGlobal not yet implemented");
-  return nullptr;
+  StringRef Name = GO->getSection();
+  checkWasmComdat(GO);
+  Kind = getWasmKindForNamedSection(Name, Kind);
+  return getContext().getWasmSection(Name, Kind);
 }
 
-static MCSectionWasm *
-selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
-                           SectionKind Kind, Mangler &Mang,
-                           const TargetMachine &TM, bool EmitUniqueSection,
-                           unsigned Flags, unsigned *NextUniqueID) {
+static MCSectionWasm *selectWasmSectionForGlobal(
+    MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {
   StringRef Group = "";
-  if (getWasmComdat(GO))
-    llvm_unreachable("comdat not yet supported for wasm");
+  checkWasmComdat(GO);
 
   bool UniqueSectionNames = TM.getUniqueSectionNames();
   SmallString<128> Name = getSectionPrefixForGlobal(Kind);
@@ -1279,8 +1307,7 @@ selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
     UniqueID = *NextUniqueID;
     (*NextUniqueID)++;
   }
-  return Ctx.getWasmSection(Name, /*Type=*/0, Flags,
-                            Group, UniqueID);
+  return Ctx.getWasmSection(Name, Kind, Group, UniqueID);
 }
 
 MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
@@ -1299,8 +1326,7 @@ MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
   EmitUniqueSection |= GO->hasComdat();
 
   return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
-                                    EmitUniqueSection, /*Flags=*/0,
-                                    &NextUniqueID);
+                                    EmitUniqueSection, &NextUniqueID);
 }
 
 bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
@@ -1330,7 +1356,21 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
       MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
 }
 
-void
-TargetLoweringObjectFileWasm::InitializeWasm() {
-  // TODO: Initialize StaticCtorSection and StaticDtorSection.
+void TargetLoweringObjectFileWasm::InitializeWasm() {
+  StaticCtorSection =
+      getContext().getWasmSection(".init_array", SectionKind::getData());
+}
+
+MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
+    unsigned Priority, const MCSymbol *KeySym) const {
+  return Priority == UINT16_MAX ?
+         StaticCtorSection :
+         getContext().getWasmSection(".init_array." + utostr(Priority),
+                                     SectionKind::getData());
+}
+
+MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
+    unsigned Priority, const MCSymbol *KeySym) const {
+  llvm_unreachable("@llvm.global_dtors should have been lowered already");
+  return nullptr;
 }
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index ed845e1706f8..853e71d0efa5 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -13,11 +13,11 @@
 
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 /// DisableFramePointerElim - This returns true if frame pointer elimination
@@ -28,7 +28,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
     return true;
 
   // Check to see if we should eliminate non-leaf frame pointers.
-  if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+  if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf"))
     return MF.getFrameInfo().hasCalls();
 
   return false;
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 817e58ce59e1..121bed5a79cb 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -47,6 +47,9 @@
 
 using namespace llvm;
 
+cl::opt<bool> EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
+                         cl::desc("Enable interprocedural register allocation "
+                                  "to reduce load/store at procedure calls."));
 static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
     cl::desc("Disable Post Regalloc Scheduler"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -90,7 +93,11 @@ static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inli
 static cl::opt<bool> EnableImplicitNullChecks(
     "enable-implicit-null-checks",
     cl::desc("Fold null checks into faulting memory operations"),
-    cl::init(false));
+    cl::init(false), cl::Hidden);
+static cl::opt<bool>
+    EnableMergeICmps("enable-mergeicmps",
+                     cl::desc("Merge ICmp chains into a single memcmp"),
+                     cl::init(false), cl::Hidden);
 static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
     cl::desc("Print LLVM IR produced by the loop-reduce pass"));
 static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
@@ -104,6 +111,11 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
 static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
     cl::Hidden,
     cl::desc("Enable machine outliner"));
+static cl::opt<bool> EnableLinkOnceODROutlining(
+    "enable-linkonceodr-outlining",
+    cl::Hidden,
+    cl::desc("Enable the machine outliner on linkonceodr functions"),
+    cl::init(false));
 // Enable or disable FastISel. Both options are needed, because
 // FastISel is enabled by default with -fast, and we wish to be
 // able to enable or disable fast-isel independently from -O0.
@@ -115,10 +127,9 @@ static cl::opt<cl::boolOrDefault>
     EnableGlobalISel("global-isel", cl::Hidden,
                      cl::desc("Enable the \"global\" instruction selector"));
 
-static cl::opt<std::string>
-PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
-                   cl::desc("Print machine instrs"),
-                   cl::value_desc("pass-name"), cl::init("option-unspecified"));
+static cl::opt<std::string> PrintMachineInstrs(
+    "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
+    cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden);
 
 static cl::opt<int> EnableGlobalISelAbort(
     "global-isel-abort", cl::Hidden,
@@ -153,6 +164,34 @@ static cl::opt<CFLAAType> UseCFLAA(
                clEnumValN(CFLAAType::Both, "both", 
                           "Enable both variants of CFL-AA")));
 
+/// Option names for limiting the codegen pipeline.
+/// Those are used in error reporting and we didn't want
+/// to duplicate their names all over the place.
+const char *StartAfterOptName = "start-after";
+const char *StartBeforeOptName = "start-before";
+const char *StopAfterOptName = "stop-after";
+const char *StopBeforeOptName = "stop-before";
+
+static cl::opt<std::string>
+    StartAfterOpt(StringRef(StartAfterOptName),
+                  cl::desc("Resume compilation after a specific pass"),
+                  cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+    StartBeforeOpt(StringRef(StartBeforeOptName),
+                   cl::desc("Resume compilation before a specific pass"),
+                   cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+    StopAfterOpt(StringRef(StopAfterOptName),
+                 cl::desc("Stop compilation after a specific pass"),
+                 cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+    StopBeforeOpt(StringRef(StopBeforeOptName),
+                  cl::desc("Stop compilation before a specific pass"),
+                  cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+
 /// Allow standard passes to be disabled by command line options. This supports
 /// simple binary flags that either suppress the pass or do nothing.
 /// i.e. -disable-mypass=false has no effect.
@@ -282,6 +321,37 @@ TargetPassConfig::~TargetPassConfig() {
   delete Impl;
 }
 
+static const PassInfo *getPassInfo(StringRef PassName) {
+  if (PassName.empty())
+    return nullptr;
+
+  const PassRegistry &PR = *PassRegistry::getPassRegistry();
+  const PassInfo *PI = PR.getPassInfo(PassName);
+  if (!PI)
+    report_fatal_error(Twine('\"') + Twine(PassName) +
+                       Twine("\" pass is not registered."));
+  return PI;
+}
+
+static AnalysisID getPassIDFromName(StringRef PassName) {
+  const PassInfo *PI = getPassInfo(PassName);
+  return PI ? PI->getTypeInfo() : nullptr;
+}
+
+void TargetPassConfig::setStartStopPasses() {
+  StartBefore = getPassIDFromName(StartBeforeOpt);
+  StartAfter = getPassIDFromName(StartAfterOpt);
+  StopBefore = getPassIDFromName(StopBeforeOpt);
+  StopAfter = getPassIDFromName(StopAfterOpt);
+  if (StartBefore && StartAfter)
+    report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
+                       Twine(StartAfterOptName) + Twine(" specified!"));
+  if (StopBefore && StopAfter)
+    report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") +
+                       Twine(StopAfterOptName) + Twine(" specified!"));
+  Started = (StartAfter == nullptr) && (StartBefore == nullptr);
+}
+
 // Out of line constructor provides default values for pass options and
 // registers all common codegen passes.
 TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
@@ -303,8 +373,17 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
   if (StringRef(PrintMachineInstrs.getValue()).equals(""))
     TM.Options.PrintMachineCode = true;
 
+  if (EnableIPRA.getNumOccurrences())
+    TM.Options.EnableIPRA = EnableIPRA;
+  else {
+    // If not explicitly specified, use target default.
+    TM.Options.EnableIPRA = TM.useIPRA();
+  }
+
   if (TM.Options.EnableIPRA)
     setRequiresCodeGenSCCOrder();
+
+  setStartStopPasses();
 }
 
 CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
@@ -339,6 +418,30 @@ TargetPassConfig::TargetPassConfig()
                      "triple set?");
 }
 
+bool TargetPassConfig::hasLimitedCodeGenPipeline() const {
+  return StartBefore || StartAfter || StopBefore || StopAfter;
+}
+
+std::string
+TargetPassConfig::getLimitedCodeGenPipelineReason(const char *Separator) const {
+  if (!hasLimitedCodeGenPipeline())
+    return std::string();
+  std::string Res;
+  static cl::opt<std::string> *PassNames[] = {&StartAfterOpt, &StartBeforeOpt,
+                                              &StopAfterOpt, &StopBeforeOpt};
+  static const char *OptNames[] = {StartAfterOptName, StartBeforeOptName,
+                                   StopAfterOptName, StopBeforeOptName};
+  bool IsFirst = true;
+  for (int Idx = 0; Idx < 4; ++Idx)
+    if (!PassNames[Idx]->empty()) {
+      if (!IsFirst)
+        Res += Separator;
+      IsFirst = false;
+      Res += OptNames[Idx];
+    }
+  return Res;
+}
+
 // Helper to verify the analysis is really immutable.
 void TargetPassConfig::setOpt(bool &Opt, bool Val) {
   assert(!Initialized && "PassConfig is immutable");
@@ -496,6 +599,16 @@ void TargetPassConfig::addIRPasses() {
       addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
   }
 
+  if (getOptLevel() != CodeGenOpt::None) {
+    // The MergeICmpsPass tries to create memcmp calls by grouping sequences of
+    // loads and compares. ExpandMemCmpPass then tries to expand those calls
+    // into optimally-sized loads and compares. The transforms are enabled by a
+    // target lowering hook.
+    if (EnableMergeICmps)
+      addPass(createMergeICmpsPass());
+    addPass(createExpandMemCmpPass());
+  }
+
   // Run GC lowering passes for builtin collectors
   // TODO: add a pass insertion point here
   addPass(createGCLoweringPass());
@@ -511,8 +624,8 @@ void TargetPassConfig::addIRPasses() {
   if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
     addPass(createPartiallyInlineLibCallsPass());
 
-  // Insert calls to mcount-like functions.
-  addPass(createCountingFunctionInserterPass());
+  // Instrument function entry and exit, e.g. with calls to mcount().
+  addPass(createPostInlineEntryExitInstrumenterPass());
 
   // Add scalarization of target's unsupported masked memory intrinsics pass.
   // the unsupported intrinsic will be replaced with a chain of basic blocks,
@@ -653,10 +766,9 @@ bool TargetPassConfig::addISelPasses() {
 /// -regalloc=... command line option.
 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
 static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
-               RegisterPassParser<RegisterRegAlloc> >
-RegAlloc("regalloc",
-         cl::init(&useDefaultRegisterAllocator),
-         cl::desc("Register allocator to use"));
+               RegisterPassParser<RegisterRegAlloc>>
+    RegAlloc("regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
+             cl::desc("Register allocator to use"));
 
 /// Add the complete set of target-independent postISel code generator passes.
 ///
@@ -694,9 +806,6 @@ void TargetPassConfig::addMachinePasses() {
   // Print the instruction selected machine code...
   printAndVerify("After Instruction Selection");
 
-  if (TM->Options.EnableIPRA)
-    addPass(createRegUsageInfoPropPass());
-
   // Expand pseudo-instructions emitted by ISel.
   addPass(&ExpandISelPseudosID);
 
@@ -709,6 +818,9 @@ void TargetPassConfig::addMachinePasses() {
     addPass(&LocalStackSlotAllocationID, false);
   }
 
+  if (TM->Options.EnableIPRA)
+    addPass(createRegUsageInfoPropPass());
+
   // Run pre-ra passes.
   addPreRegAlloc();
 
@@ -788,7 +900,7 @@ void TargetPassConfig::addMachinePasses() {
   addPass(&PatchableFunctionID, false);
 
   if (EnableMachineOutliner)
-    PM->add(createMachineOutlinerPass());
+    PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining));
 
   AddingMachinePasses = false;
 }
@@ -824,9 +936,6 @@ void TargetPassConfig::addMachineSSAOptimization() {
   addPass(&MachineLICMID, false);
   addPass(&MachineCSEID, false);
 
-  // Coalesce basic blocks with the same branch condition
-  addPass(&BranchCoalescingID);
-
   addPass(&MachineSinkingID);
 
   addPass(&PeepholeOptimizerID);
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index eeb00a784b0d..f03c3b8300f3 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -11,13 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
@@ -27,9 +31,6 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Printable.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 #include <utility>
 
@@ -41,11 +42,14 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
                              regclass_iterator RCB, regclass_iterator RCE,
                              const char *const *SRINames,
                              const LaneBitmask *SRILaneMasks,
-                             LaneBitmask SRICoveringLanes)
+                             LaneBitmask SRICoveringLanes,
+                             const RegClassInfo *const RCIs,
+                             unsigned Mode)
   : InfoDesc(ID), SubRegIndexNames(SRINames),
     SubRegIndexLaneMasks(SRILaneMasks),
     RegClassBegin(RCB), RegClassEnd(RCE),
-    CoveringLanes(SRICoveringLanes) {
+    CoveringLanes(SRICoveringLanes),
+    RCInfos(RCIs), HwMode(Mode) {
 }
 
 TargetRegisterInfo::~TargetRegisterInfo() = default;
@@ -65,8 +69,8 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
       continue;
     for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
       if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) {
-        dbgs() << "Error: Super register " << PrintReg(*SR, this)
-               << " of reserved register " << PrintReg(Reg, this)
+        dbgs() << "Error: Super register " << printReg(*SR, this)
+               << " of reserved register " << printReg(Reg, this)
                << " is not reserved.\n";
         return false;
       }
@@ -81,7 +85,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
 
 namespace llvm {
 
-Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
+Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI,
                    unsigned SubIdx) {
   return Printable([Reg, TRI, SubIdx](raw_ostream &OS) {
     if (!Reg)
@@ -89,11 +93,15 @@ Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
     else if (TargetRegisterInfo::isStackSlot(Reg))
       OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
     else if (TargetRegisterInfo::isVirtualRegister(Reg))
-      OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
-    else if (TRI && Reg < TRI->getNumRegs())
-      OS << '%' << TRI->getName(Reg);
-    else
-      OS << "%physreg" << Reg;
+      OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+    else if (!TRI)
+      OS << '%' << "physreg" << Reg;
+    else if (Reg < TRI->getNumRegs()) {
+      OS << '%';
+      printLowerCase(TRI->getName(Reg), OS);
+    } else
+      llvm_unreachable("Register kind is unsupported.");
+
     if (SubIdx) {
       if (TRI)
         OS << ':' << TRI->getSubRegIndexName(SubIdx);
@@ -103,7 +111,7 @@ Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
   });
 }
 
-Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
   return Printable([Unit, TRI](raw_ostream &OS) {
     // Generic printout when TRI is missing.
     if (!TRI) {
@@ -126,12 +134,27 @@ Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
   });
 }
 
-Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
   return Printable([Unit, TRI](raw_ostream &OS) {
     if (TRI && TRI->isVirtualRegister(Unit)) {
-      OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
+      OS << '%' << TargetRegisterInfo::virtReg2Index(Unit);
     } else {
-      OS << PrintRegUnit(Unit, TRI);
+      OS << printRegUnit(Unit, TRI);
+    }
+  });
+}
+
+Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
+                              const TargetRegisterInfo *TRI) {
+  return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) {
+    if (RegInfo.getRegClassOrNull(Reg))
+      OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+    else if (RegInfo.getRegBankOrNull(Reg))
+      OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
+    else {
+      OS << "_";
+      assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
+             "Generic registers must have a valid type");
     }
   });
 }
@@ -357,7 +380,7 @@ bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
 }
 
 // Compute target-independent register allocator hints to help eliminate copies.
-void
+bool
 TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
                                           ArrayRef<MCPhysReg> Order,
                                           SmallVectorImpl<MCPhysReg> &Hints,
@@ -365,49 +388,55 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
                                           const VirtRegMap *VRM,
                                           const LiveRegMatrix *Matrix) const {
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
-
-  // Hints with HintType != 0 were set by target-dependent code.
-  // Such targets must provide their own implementation of
-  // TRI::getRegAllocationHints to interpret those hint types.
-  assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
-
-  // Target-independent hints are either a physical or a virtual register.
-  unsigned Phys = Hint.second;
-  if (VRM && isVirtualRegister(Phys))
-    Phys = VRM->getPhys(Phys);
-
-  // Check that Phys is a valid hint in VirtReg's register class.
-  if (!isPhysicalRegister(Phys))
-    return;
-  if (MRI.isReserved(Phys))
-    return;
-  // Check that Phys is in the allocation order. We shouldn't heed hints
-  // from VirtReg's register class if they aren't in the allocation order. The
-  // target probably has a reason for removing the register.
-  if (!is_contained(Order, Phys))
-    return;
-
-  // All clear, tell the register allocator to prefer this register.
-  Hints.push_back(Phys);
+  const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI =
+    MRI.getRegAllocationHints(VirtReg);
+
+  // First hint may be a target hint.
+  bool Skip = (Hints_MRI.first != 0);
+  for (auto Reg : Hints_MRI.second) {
+    if (Skip) {
+      Skip = false;
+      continue;
+    }
+
+    // Target-independent hints are either a physical or a virtual register.
+    unsigned Phys = Reg;
+    if (VRM && isVirtualRegister(Phys))
+      Phys = VRM->getPhys(Phys);
+
+    // Check that Phys is a valid hint in VirtReg's register class.
+    if (!isPhysicalRegister(Phys))
+      continue;
+    if (MRI.isReserved(Phys))
+      continue;
+    // Check that Phys is in the allocation order. We shouldn't heed hints
+    // from VirtReg's register class if they aren't in the allocation order. The
+    // target probably has a reason for removing the register.
+    if (!is_contained(Order, Phys))
+      continue;
+
+    // All clear, tell the register allocator to prefer this register.
+    Hints.push_back(Phys);
+  }
+  return false;
 }
 
 bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+  return !MF.getFunction().hasFnAttribute("no-realign-stack");
 }
 
 bool TargetRegisterInfo::needsStackRealignment(
     const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-  const Function *F = MF.getFunction();
+  const Function &F = MF.getFunction();
   unsigned StackAlign = TFI->getStackAlignment();
   bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) ||
-                              F->hasFnAttribute(Attribute::StackAlignment));
-  if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
+                              F.hasFnAttribute(Attribute::StackAlignment));
+  if (F.hasFnAttribute("stackrealign") || requiresRealignment) {
     if (canRealignStack(MF))
       return true;
-    DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n");
+    DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n");
   }
   return false;
 }
@@ -425,6 +454,6 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
 LLVM_DUMP_METHOD
 void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
                                  const TargetRegisterInfo *TRI) {
-  dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
+  dbgs() << printReg(Reg, TRI, SubRegIndex) << "\n";
 }
 #endif
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 9210ea8a83f6..86dbf1b2aeab 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -16,15 +16,15 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -316,7 +316,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
   // correctly append imp-use operands, and readsReg() strangely returns false
   // for predicated defs.
   unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
-  const MachineFunction &MF = *DefMI->getParent()->getParent();
+  const MachineFunction &MF = *DefMI->getMF();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
     return computeInstrLatency(DefMI);
@@ -339,42 +339,46 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
 static Optional<double>
 getRThroughputFromItineraries(unsigned schedClass,
                               const InstrItineraryData *IID){
-  double Unknown = std::numeric_limits<double>::infinity();
-  double Throughput = Unknown;
+  Optional<double> Throughput;
 
   for (const InstrStage *IS = IID->beginStage(schedClass),
                         *E = IID->endStage(schedClass);
        IS != E; ++IS) {
-    unsigned Cycles = IS->getCycles();
-    if (!Cycles)
-      continue;
-    Throughput =
-        std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles);
+    if (IS->getCycles()) {
+      double Temp = countPopulation(IS->getUnits()) * 1.0 / IS->getCycles();
+      Throughput = Throughput.hasValue()
+                        ? std::min(Throughput.getValue(), Temp)
+                        : Temp;
+    }
   }
-  // We need reciprocal throughput that's why we return such value.
-  return 1 / Throughput;
+  if (Throughput.hasValue())
+    // We need reciprocal throughput that's why we return such value.
+    return 1 / Throughput.getValue();
+  return Throughput;
 }
 
 static Optional<double>
 getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
                                   const TargetSubtargetInfo *STI,
                                   const MCSchedModel &SchedModel) {
-  double Unknown = std::numeric_limits<double>::infinity();
-  double Throughput = Unknown;
+  Optional<double> Throughput;
 
   for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
                                  *WEnd = STI->getWriteProcResEnd(SCDesc);
        WPR != WEnd; ++WPR) {
-    unsigned Cycles = WPR->Cycles;
-    if (!Cycles)
-      return Optional<double>();
-
-    unsigned NumUnits =
-        SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
-    Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles);
+    if (WPR->Cycles) {
+      unsigned NumUnits =
+          SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
+      double Temp = NumUnits * 1.0 / WPR->Cycles;
+      Throughput = Throughput.hasValue()
+                       ? std::min(Throughput.getValue(), Temp)
+                       : Temp;
+    }
   }
-  // We need reciprocal throughput that's why we return such value.
-  return 1 / Throughput;
+  if (Throughput.hasValue())
+    // We need reciprocal throughput that's why we return such value.
+    return 1 / Throughput.getValue();
+  return Throughput;
 }
 
 Optional<double>
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index f6d5bc80ddff..1a317cd865f0 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
@@ -50,6 +51,10 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
   return true;
 }
 
+bool TargetSubtargetInfo::enableAdvancedRASplitCost() const {
+  return false;
+}
+
 bool TargetSubtargetInfo::enablePostRAScheduler() const {
   return getSchedModel().PostRAScheduler;
 }
@@ -93,9 +98,15 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
   // that could be changed during the compilation
   TargetSchedModel TSchedModel;
   TSchedModel.init(getSchedModel(), this, getInstrInfo());
-  if (!TSchedModel.hasInstrSchedModel())
+  unsigned Latency;
+  if (TSchedModel.hasInstrSchedModel())
+    Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
+  else if (TSchedModel.hasInstrItineraries()) {
+    auto *ItinData = TSchedModel.getInstrItineraries();
+    Latency = ItinData->getStageLatency(
+        getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
+  } else
     return std::string();
-  unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
   Optional<double> RThroughput =
       TSchedModel.computeInstrRThroughput(MCI.getOpcode());
   return createSchedInfoStr(Latency, RThroughput);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 83c00e24d14f..774b76f84b7f 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1,4 +1,4 @@
-//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -28,27 +28,40 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
 
 using namespace llvm;
 
@@ -76,6 +89,7 @@ static cl::opt<unsigned> MaxDataFlowEdge(
              "the benefit of commuting operands"));
 
 namespace {
+
 class TwoAddressInstructionPass : public MachineFunctionPass {
   MachineFunction *MF;
   const TargetInstrInfo *TII;
@@ -96,6 +110,10 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // Set of already processed instructions in the current block.
   SmallPtrSet<MachineInstr*, 8> Processed;
 
+  // Set of instructions converted to three-address by target and then sunk
+  // down current basic block.
+  SmallPtrSet<MachineInstr*, 8> SunkInstrs;
+
   // A map from virtual registers to physical registers which are likely targets
   // to be coalesced to due to copies from physical registers to virtual
   // registers. e.g. v1024 = move r0.
@@ -148,14 +166,16 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
 
   void processCopy(MachineInstr *MI);
 
-  typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
-  typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+  using TiedPairList = SmallVector<std::pair<unsigned, unsigned>, 4>;
+  using TiedOperandMap = SmallDenseMap<unsigned, TiedPairList>;
+
   bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
   void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
   void eliminateRegSequence(MachineBasicBlock::iterator&);
 
 public:
   static char ID; // Pass identification, replacement for typeid
+
   TwoAddressInstructionPass() : MachineFunctionPass(ID) {
     initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
   }
@@ -175,17 +195,19 @@ public:
   /// Pass entry point.
   bool runOnMachineFunction(MachineFunction&) override;
 };
+
 } // end anonymous namespace
 
 char TwoAddressInstructionPass::ID = 0;
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
 INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE,
                 "Two-Address instruction pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
                 "Two-Address instruction pass", false, false)
 
-char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
-
 static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
 
 /// A two-address instruction has been converted to a three-address instruction
@@ -267,7 +289,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
   ++KillPos;
 
   unsigned NumVisited = 0;
-  for (MachineInstr &OtherMI : llvm::make_range(std::next(OldPos), KillPos)) {
+  for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) {
     // DBG_VALUE cannot be counted against the limit.
     if (OtherMI.isDebugValue())
       continue;
@@ -436,8 +458,8 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
 /// For example, in this code:
 ///
 ///   %reg1034 = copy %reg1024
-///   %reg1035 = copy %reg1025<kill>
-///   %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///   %reg1035 = copy killed %reg1025
+///   %reg1036 = add killed %reg1034, killed %reg1035
 ///
 /// %reg1034 is not considered to be killed, since it is copied from a
 /// register which is not killed. Treating it as not killed lets the
@@ -452,7 +474,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
                      LiveIntervals *LIS,
                      bool allowFalsePositives) {
   MachineInstr *DefMI = &MI;
-  for (;;) {
+  while (true) {
     // All uses of physical registers are likely to be kills.
     if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
         (allowFalsePositives || MRI->hasOneUse(Reg)))
@@ -569,31 +591,31 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
   // general, we want no uses between this instruction and the definition of
   // the two-address register.
   // e.g.
-  // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
-  // %reg1029<def> = MOV8rr %reg1028
-  // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
-  // insert => %reg1030<def> = MOV8rr %reg1028
-  // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+  // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
+  // %reg1029 = MOV8rr %reg1028
+  // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
+  // insert => %reg1030 = MOV8rr %reg1028
+  // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
   // In this case, it might not be possible to coalesce the second MOV8rr
   // instruction if the first one is coalesced. So it would be profitable to
   // commute it:
-  // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
-  // %reg1029<def> = MOV8rr %reg1028
-  // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
-  // insert => %reg1030<def> = MOV8rr %reg1029
-  // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+  // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
+  // %reg1029 = MOV8rr %reg1028
+  // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
+  // insert => %reg1030 = MOV8rr %reg1029
+  // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
 
   if (!isPlainlyKilled(MI, regC, LIS))
     return false;
 
   // Ok, we have something like:
-  // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+  // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
   // let's see if it's worth commuting it.
 
   // Look for situations like this:
-  // %reg1024<def> = MOV r1
-  // %reg1025<def> = MOV r0
-  // %reg1026<def> = ADD %reg1024, %reg1025
+  // %reg1024 = MOV r1
+  // %reg1025 = MOV r0
+  // %reg1026 = ADD %reg1024, %reg1025
   // r0            = MOV %reg1026
   // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
   unsigned ToRegA = getMappedReg(regA, DstRegMap);
@@ -691,9 +713,9 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
 bool
 TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
   // Look for situations like this:
-  // %reg1024<def> = MOV r1
-  // %reg1025<def> = MOV r0
-  // %reg1026<def> = ADD %reg1024, %reg1025
+  // %reg1024 = MOV r1
+  // %reg1025 = MOV r0
+  // %reg1026 = ADD %reg1024, %reg1025
   // r2            = MOV %reg1026
   // Turn ADD into a 3-address instruction to avoid a copy.
   unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
@@ -738,6 +760,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
     mi = NewMI;
     nmi = std::next(mi);
   }
+  else
+    SunkInstrs.insert(NewMI);
 
   // Update source and destination register maps.
   SrcRegMap.erase(RegA);
@@ -904,7 +928,6 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
   // Move the copies connected to MI down as well.
   MachineBasicBlock::iterator Begin = MI;
   MachineBasicBlock::iterator AfterMI = std::next(Begin);
-
   MachineBasicBlock::iterator End = AfterMI;
   while (End->isCopy() &&
          regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
@@ -916,7 +939,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
   unsigned NumVisited = 0;
   MachineBasicBlock::iterator KillPos = KillMI;
   ++KillPos;
-  for (MachineInstr &OtherMI : llvm::make_range(End, KillPos)) {
+  for (MachineInstr &OtherMI : make_range(End, KillPos)) {
     // DBG_VALUE cannot be counted against the limit.
     if (OtherMI.isDebugValue())
       continue;
@@ -1090,7 +1113,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
   // Check if the reschedule will not break depedencies.
   unsigned NumVisited = 0;
   for (MachineInstr &OtherMI :
-       llvm::make_range(mi, MachineBasicBlock::iterator(KillMI))) {
+       make_range(mi, MachineBasicBlock::iterator(KillMI))) {
     // DBG_VALUE cannot be counted against the limit.
     if (OtherMI.isDebugValue())
       continue;
@@ -1443,7 +1466,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
 
     assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
 
-    // Deal with <undef> uses immediately - simply rewrite the src operand.
+    // Deal with undef uses immediately - simply rewrite the src operand.
     if (SrcMO.isUndef() && !DstMO.getSubReg()) {
       // Constrain the DstReg register class if required.
       if (TargetRegisterInfo::isVirtualRegister(DstReg))
@@ -1609,7 +1632,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
       if (I->end == UseIdx)
         LI.removeSegment(LastCopyIdx, UseIdx);
     }
-
   } else if (RemovedKillFlag) {
     // Some tied uses of regB matched their destination registers, so
     // regB is still used in this instruction, but a kill flag was
@@ -1639,6 +1661,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   else
     AA = nullptr;
   OptLevel = TM.getOptLevel();
+  // Disable optimizations if requested. We cannot skip the whole pass as some
+  // fixups are necessary for correctness.
+  if (skipFunction(Func.getFunction()))
+    OptLevel = CodeGenOpt::None;
 
   bool MadeChange = false;
 
@@ -1658,10 +1684,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
     SrcRegMap.clear();
     DstRegMap.clear();
     Processed.clear();
+    SunkInstrs.clear();
     for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
          mi != me; ) {
       MachineBasicBlock::iterator nmi = std::next(mi);
-      if (mi->isDebugValue()) {
+      // Don't revisit an instruction previously converted by target. It may
+      // contain undef register operands (%noreg), which are not handled.
+      if (mi->isDebugValue() || SunkInstrs.count(&*mi)) {
         mi = nmi;
         continue;
       }
@@ -1690,7 +1719,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
       // transformations that may either eliminate the tied operands or
       // improve the opportunities for coalescing away the register copy.
       if (TiedOperands.size() == 1) {
-        SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs
+        SmallVectorImpl<std::pair<unsigned, unsigned>> &TiedPairs
           = TiedOperands.begin()->second;
         if (TiedPairs.size() == 1) {
           unsigned SrcIdx = TiedPairs[0].first;
@@ -1749,9 +1778,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
 ///
 /// Becomes:
 ///
-///   %dst:ssub0<def,undef> = COPY %v1
-///   %dst:ssub1<def> = COPY %v2
-///
+///   undef %dst:ssub0 = COPY %v1
+///   %dst:ssub1 = COPY %v2
 void TwoAddressInstructionPass::
 eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
@@ -1775,7 +1803,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
     MachineOperand &UseMO = MI.getOperand(i);
     unsigned SrcReg = UseMO.getReg();
     unsigned SubIdx = MI.getOperand(i+1).getImm();
-    // Nothing needs to be inserted for <undef> operands.
+    // Nothing needs to be inserted for undef operands.
     if (UseMO.isUndef())
       continue;
 
@@ -1797,7 +1825,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
                                .addReg(DstReg, RegState::Define, SubIdx)
                                .add(UseMO);
 
-    // The first def needs an <undef> flag because there is no live register
+    // The first def needs an undef flag because there is no live register
     // before it.
     if (!DefEmitted) {
       CopyMI->getOperand(0).setIsUndef(true);
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 407fd9b162e9..5288ca672774 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Dominators.h"
@@ -37,7 +38,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 static bool eliminateUnreachableBlock(Function &F) {
@@ -207,11 +207,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
           MachineRegisterInfo &MRI = F.getRegInfo();
           unsigned InputSub = Input.getSubReg();
           if (InputSub == 0 &&
-              MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) {
+              MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)) &&
+              !Input.isUndef()) {
             MRI.replaceRegWith(OutputReg, InputReg);
           } else {
             // The input register to the PHI has a subregister or it can't be
-            // constrained to the proper register class:
+            // constrained to the proper register class or it is undef:
             // insert a COPY instead of simply replacing the output
             // with the input.
             const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index f8aacdb8649d..64bb37a280a6 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,24 +18,32 @@
 
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "LiveDebugVariables.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "regalloc"
@@ -132,8 +140,8 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
-      OS << '[' << PrintReg(Reg, TRI) << " -> "
-         << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+      OS << '[' << printReg(Reg, TRI) << " -> "
+         << printReg(Virt2PhysMap[Reg], TRI) << "] "
          << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
     }
   }
@@ -141,7 +149,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
-      OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+      OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
          << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
     }
   }
@@ -164,9 +172,9 @@ LLVM_DUMP_METHOD void VirtRegMap::dump() const {
 // according to LiveIntervals.
 //
 namespace {
+
 class VirtRegRewriter : public MachineFunctionPass {
   MachineFunction *MF;
-  const TargetMachine *TM;
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
   MachineRegisterInfo *MRI;
@@ -184,18 +192,23 @@ class VirtRegRewriter : public MachineFunctionPass {
 
 public:
   static char ID;
+
   VirtRegRewriter() : MachineFunctionPass(ID) {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   bool runOnMachineFunction(MachineFunction&) override;
+
   MachineFunctionProperties getSetProperties() const override {
     return MachineFunctionProperties().set(
         MachineFunctionProperties::Property::NoVRegs);
   }
 };
+
 } // end anonymous namespace
 
+char VirtRegRewriter::ID = 0;
+
 char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
 
 INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
@@ -208,8 +221,6 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
                     "Virtual Register Rewriter", false, false)
 
-char VirtRegRewriter::ID = 0;
-
 void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<LiveIntervals>();
@@ -224,7 +235,6 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
   MF = &fn;
-  TM = &MF->getTarget();
   TRI = MF->getSubtarget().getRegisterInfo();
   TII = MF->getSubtarget().getInstrInfo();
   MRI = &MF->getRegInfo();
@@ -260,8 +270,9 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
   assert(!LI.empty());
   assert(LI.hasSubRanges());
 
-  typedef std::pair<const LiveInterval::SubRange *,
-                    LiveInterval::const_iterator> SubRangeIteratorPair;
+  using SubRangeIteratorPair =
+      std::pair<const LiveInterval::SubRange *, LiveInterval::const_iterator>;
+
   SmallVector<SubRangeIteratorPair, 4> SubRanges;
   SlotIndex First;
   SlotIndex Last;
@@ -369,8 +380,8 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
   ++NumIdCopies;
 
   // Copies like:
-  //    %R0 = COPY %R0<undef>
-  //    %AL = COPY %AL, %EAX<imp-def>
+  //    %r0 = COPY undef %r0
+  //    %al = COPY %al, implicit-def %eax
   // give us additional liveness information: The target (super-)register
   // must not be valid before this point. Replace the COPY with a KILL
   // instruction to maintain this information.
@@ -477,7 +488,7 @@ void VirtRegRewriter::rewrite() {
         if (SubReg != 0) {
           if (NoSubRegLiveness) {
             // A virtual register kill refers to the whole register, so we may
-            // have to add <imp-use,kill> operands for the super-register.  A
+            // have to add implicit killed operands for the super-register.  A
             // partial redef always kills and redefines the super-register.
             if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
                 (MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
@@ -502,9 +513,9 @@ void VirtRegRewriter::rewrite() {
             }
           }
 
-          // The <def,undef> and <def,internal> flags only make sense for
+          // The def undef and def internal flags only make sense for
           // sub-register defs, and we are substituting a full physreg.  An
-          // <imp-use,kill> operand from the SuperKills list will represent the
+          // implicit killed operand from the SuperKills list will represent the
           // partial read of the super-register.
           if (MO.isDef()) {
             MO.setIsUndef(false);
@@ -519,6 +530,7 @@ void VirtRegRewriter::rewrite() {
         // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
         // we need the inlining here.
         MO.setReg(PhysReg);
+        MO.setIsRenamableIfNoExtraRegAllocReq();
       }
 
       // Add any missing super-register kills after rewriting the whole
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index c63a0a9e60ea..7ad84734203d 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -1014,6 +1014,7 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
   removeUnreachableBlocks(F);
 }
 
+#ifndef NDEBUG
 void WinEHPrepare::verifyPreparedFunclets(Function &F) {
   for (BasicBlock &BB : F) {
     size_t NumColors = BlockColors[&BB].size();
@@ -1026,6 +1027,7 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) {
            "EH Pad still has a PHI!");
   }
 }
+#endif
 
 bool WinEHPrepare::prepareExplicitEH(Function &F) {
   // Remove unreachable blocks.  It is not valuable to assign them a color and
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 0b4c6e551667..3d83afcf1fc5 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -14,8 +14,8 @@
 //
 //===---------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -23,17 +23,26 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
 namespace {
 
+struct InstrumentationOptions {
+  // Whether to emit PATCHABLE_TAIL_CALL.
+  bool HandleTailcall;
+
+  // Whether to emit PATCHABLE_RET/PATCHABLE_FUNCTION_EXIT for all forms of
+  // return, e.g. conditional return.
+  bool HandleAllReturns;
+};
+
 struct XRayInstrumentation : public MachineFunctionPass {
   static char ID;
 
@@ -59,7 +68,8 @@ private:
   // This is the approach to go on CPUs which have a single RET instruction,
   //   like x86/x86_64.
   void replaceRetWithPatchableRet(MachineFunction &MF,
-                                  const TargetInstrInfo *TII);
+                                  const TargetInstrInfo *TII,
+                                  InstrumentationOptions);
 
   // Prepend the original return instruction with the exit sled code ("patchable
   //   function exit" pseudo-instruction), preserving the original return
@@ -70,25 +80,28 @@ private:
   //   have to call the trampoline and return from it to the original return
   //   instruction of the function being instrumented.
   void prependRetWithPatchableExit(MachineFunction &MF,
-                                   const TargetInstrInfo *TII);
+                                   const TargetInstrInfo *TII,
+                                   InstrumentationOptions);
 };
 
 } // end anonymous namespace
 
 void XRayInstrumentation::replaceRetWithPatchableRet(
-    MachineFunction &MF, const TargetInstrInfo *TII) {
+    MachineFunction &MF, const TargetInstrInfo *TII,
+    InstrumentationOptions op) {
   // We look for *all* terminators and returns, then replace those with
   // PATCHABLE_RET instructions.
   SmallVector<MachineInstr *, 4> Terminators;
   for (auto &MBB : MF) {
     for (auto &T : MBB.terminators()) {
       unsigned Opc = 0;
-      if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
+      if (T.isReturn() &&
+          (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) {
         // Replace return instructions with:
         //   PATCHABLE_RET <Opcode>, <Operand>...
         Opc = TargetOpcode::PATCHABLE_RET;
       }
-      if (TII->isTailCall(T)) {
+      if (TII->isTailCall(T) && op.HandleTailcall) {
         // Treat the tail call as a return instruction, which has a
         // different-looking sled than the normal return case.
         Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
@@ -108,14 +121,16 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
 }
 
 void XRayInstrumentation::prependRetWithPatchableExit(
-    MachineFunction &MF, const TargetInstrInfo *TII) {
-  for (auto &MBB : MF) {
+    MachineFunction &MF, const TargetInstrInfo *TII,
+    InstrumentationOptions op) {
+  for (auto &MBB : MF)
     for (auto &T : MBB.terminators()) {
       unsigned Opc = 0;
-      if (T.isReturn()) {
+      if (T.isReturn() &&
+          (op.HandleAllReturns || T.getOpcode() == TII->getReturnOpcode())) {
         Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT;
       }
-      if (TII->isTailCall(T)) {
+      if (TII->isTailCall(T) && op.HandleTailcall) {
         Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
       }
       if (Opc != 0) {
@@ -124,11 +139,10 @@ void XRayInstrumentation::prependRetWithPatchableExit(
         BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc));
       }
     }
-  }
 }
 
 bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
-  auto &F = *MF.getFunction();
+  auto &F = MF.getFunction();
   auto InstrAttr = F.getFnAttribute("function-instrument");
   bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
                           InstrAttr.isStringAttribute() &&
@@ -143,7 +157,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
 
     // Count the number of MachineInstr`s in MachineFunction
     int64_t MICount = 0;
-    for (const auto& MBB : MF)
+    for (const auto &MBB : MF)
       MICount += MBB.size();
 
     // Check if we have a loop.
@@ -180,20 +194,35 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
   case Triple::ArchType::arm:
   case Triple::ArchType::thumb:
   case Triple::ArchType::aarch64:
-  case Triple::ArchType::ppc64le:
   case Triple::ArchType::mips:
   case Triple::ArchType::mipsel:
   case Triple::ArchType::mips64:
-  case Triple::ArchType::mips64el:
+  case Triple::ArchType::mips64el: {
     // For the architectures which don't have a single return instruction
-    prependRetWithPatchableExit(MF, TII);
+    InstrumentationOptions op;
+    op.HandleTailcall = false;
+    op.HandleAllReturns = true;
+    prependRetWithPatchableExit(MF, TII, op);
+    break;
+  }
+  case Triple::ArchType::ppc64le: {
+    // PPC has conditional returns. Turn them into branch and plain returns.
+    InstrumentationOptions op;
+    op.HandleTailcall = false;
+    op.HandleAllReturns = true;
+    replaceRetWithPatchableRet(MF, TII, op);
     break;
-  default:
+  }
+  default: {
     // For the architectures that have a single return instruction (such as
     //   RETQ on x86_64).
-    replaceRetWithPatchableRet(MF, TII);
+    InstrumentationOptions op;
+    op.HandleTailcall = true;
+    op.HandleAllReturns = false;
+    replaceRetWithPatchableRet(MF, TII, op);
     break;
   }
+  }
   return true;
 }