From 706b4fc47bbc608932d3b491ae19a3b9cde9497b Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Fri, 17 Jan 2020 20:45:01 +0000
Subject: Vendor import of llvm-project master e26a78e70, the last commit
 before the llvmorg-11-init tag, from which release/10.x was branched.

---
 llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp |   90 +-
 .../PowerPC/Disassembler/PPCDisassembler.cpp       |    6 +-
 .../Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp |    8 +-
 .../Target/PowerPC/MCTargetDesc/PPCInstPrinter.h   |    6 +-
 .../Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp   |   28 +-
 .../lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h |    7 -
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp       |   23 +-
 .../Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h  |   24 +
 llvm/lib/Target/PowerPC/P9InstrResources.td        |  229 ++--
 llvm/lib/Target/PowerPC/PPC.h                      |   11 +-
 llvm/lib/Target/PowerPC/PPC.td                     |  175 ++-
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp          |  356 +++--
 llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp    |    1 +
 llvm/lib/Target/PowerPC/PPCCTRLoops.cpp            |    4 +-
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp       |    7 +-
 llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp   |    4 +-
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp        |  401 +++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp        | 1357 +++++++++++++-------
 llvm/lib/Target/PowerPC/PPCISelLowering.h          |  936 +++++++-------
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td           |   78 +-
 llvm/lib/Target/PowerPC/PPCInstrAltivec.td         |  121 +-
 llvm/lib/Target/PowerPC/PPCInstrFormats.td         |   61 +-
 llvm/lib/Target/PowerPC/PPCInstrHTM.td             |   16 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp           |  549 +++++---
 llvm/lib/Target/PowerPC/PPCInstrInfo.h             |   14 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td            |  383 +++---
 llvm/lib/Target/PowerPC/PPCInstrVSX.td             |  190 ++-
 llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp   |  894 +++++++++++++
 llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp      |  605 ---------
 llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp   |  164 +++
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp          |  390 ++++--
 llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h   |    2 +-
 llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp     |   25 +-
 llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp    |   13 +-
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp        |   35 +-
 llvm/lib/Target/PowerPC/PPCRegisterInfo.h          |    9 +
 llvm/lib/Target/PowerPC/PPCSubtarget.cpp           |    7 +-
 llvm/lib/Target/PowerPC/PPCSubtarget.h             |   45 +-
 llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp      |    1 +
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp       |   20 +-
 llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp |   96 +-
 llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h   |   17 +-
 llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp        |    1 +
 .../PowerPC/TargetInfo/PowerPCTargetInfo.cpp       |    2 +-
 44 files changed, 4573 insertions(+), 2838 deletions(-)
 create mode 100644 llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
 delete mode 100644 llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
 create mode 100644 llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp

(limited to 'llvm/lib/Target/PowerPC')

diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index aedf5b713c3f..7e7902c27a81 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -800,9 +800,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     Inst = TmpInst;
     break;
   }
-  case PPC::SUBICo: {
+  case PPC::SUBIC_rec: {
     MCInst TmpInst;
-    TmpInst.setOpcode(PPC::ADDICo);
+    TmpInst.setOpcode(PPC::ADDIC_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     addNegOperand(TmpInst, Inst.getOperand(2), getContext());
@@ -810,11 +810,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::EXTLWI:
-  case PPC::EXTLWIo: {
+  case PPC::EXTLWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
     int64_t B = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::EXTLWI ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(B));
@@ -824,11 +824,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::EXTRWI:
-  case PPC::EXTRWIo: {
+  case PPC::EXTRWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
     int64_t B = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::EXTRWI ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(B + N));
@@ -838,11 +838,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::INSLWI:
-  case PPC::INSLWIo: {
+  case PPC::INSLWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
     int64_t B = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::INSLWI? PPC::RLWIMI : PPC::RLWIMIo);
+    TmpInst.setOpcode(Opcode == PPC::INSLWI ? PPC::RLWIMI : PPC::RLWIMI_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
@@ -853,11 +853,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::INSRWI:
-  case PPC::INSRWIo: {
+  case PPC::INSRWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
     int64_t B = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::INSRWI? PPC::RLWIMI : PPC::RLWIMIo);
+    TmpInst.setOpcode(Opcode == PPC::INSRWI ? PPC::RLWIMI : PPC::RLWIMI_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
@@ -868,10 +868,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::ROTRWI:
-  case PPC::ROTRWIo: {
+  case PPC::ROTRWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::ROTRWI ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(32 - N));
@@ -881,10 +881,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::SLWI:
-  case PPC::SLWIo: {
+  case PPC::SLWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::SLWI ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(N));
@@ -894,10 +894,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::SRWI:
-  case PPC::SRWIo: {
+  case PPC::SRWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::SRWI ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(32 - N));
@@ -907,10 +907,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::CLRRWI:
-  case PPC::CLRRWIo: {
+  case PPC::CLRRWI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::CLRRWI ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(0));
@@ -920,11 +920,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::CLRLSLWI:
-  case PPC::CLRLSLWIo: {
+  case PPC::CLRLSLWI_rec: {
     MCInst TmpInst;
     int64_t B = Inst.getOperand(2).getImm();
     int64_t N = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::CLRLSLWI ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(N));
@@ -934,11 +934,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::EXTLDI:
-  case PPC::EXTLDIo: {
+  case PPC::EXTLDI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
     int64_t B = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
+    TmpInst.setOpcode(Opcode == PPC::EXTLDI ? PPC::RLDICR : PPC::RLDICR_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(B));
@@ -947,11 +947,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::EXTRDI:
-  case PPC::EXTRDIo: {
+  case PPC::EXTRDI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
     int64_t B = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
+    TmpInst.setOpcode(Opcode == PPC::EXTRDI ? PPC::RLDICL : PPC::RLDICL_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(B + N));
@@ -960,11 +960,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::INSRDI:
-  case PPC::INSRDIo: {
+  case PPC::INSRDI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
     int64_t B = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::INSRDI? PPC::RLDIMI : PPC::RLDIMIo);
+    TmpInst.setOpcode(Opcode == PPC::INSRDI ? PPC::RLDIMI : PPC::RLDIMI_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
@@ -974,10 +974,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::ROTRDI:
-  case PPC::ROTRDIo: {
+  case PPC::ROTRDI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
+    TmpInst.setOpcode(Opcode == PPC::ROTRDI ? PPC::RLDICL : PPC::RLDICL_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(64 - N));
@@ -986,10 +986,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::SLDI:
-  case PPC::SLDIo: {
+  case PPC::SLDI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
+    TmpInst.setOpcode(Opcode == PPC::SLDI ? PPC::RLDICR : PPC::RLDICR_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(N));
@@ -1007,10 +1007,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::SRDI:
-  case PPC::SRDIo: {
+  case PPC::SRDI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
+    TmpInst.setOpcode(Opcode == PPC::SRDI ? PPC::RLDICL : PPC::RLDICL_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(64 - N));
@@ -1019,10 +1019,10 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::CLRRDI:
-  case PPC::CLRRDIo: {
+  case PPC::CLRRDI_rec: {
     MCInst TmpInst;
     int64_t N = Inst.getOperand(2).getImm();
-    TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
+    TmpInst.setOpcode(Opcode == PPC::CLRRDI ? PPC::RLDICR : PPC::RLDICR_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(0));
@@ -1031,11 +1031,11 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::CLRLSLDI:
-  case PPC::CLRLSLDIo: {
+  case PPC::CLRLSLDI_rec: {
     MCInst TmpInst;
     int64_t B = Inst.getOperand(2).getImm();
     int64_t N = Inst.getOperand(3).getImm();
-    TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
+    TmpInst.setOpcode(Opcode == PPC::CLRLSLDI ? PPC::RLDIC : PPC::RLDIC_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(N));
@@ -1044,14 +1044,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::RLWINMbm:
-  case PPC::RLWINMobm: {
+  case PPC::RLWINMbm_rec: {
     unsigned MB, ME;
     int64_t BM = Inst.getOperand(3).getImm();
     if (!isRunOfOnes(BM, MB, ME))
       break;
 
     MCInst TmpInst;
-    TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(Inst.getOperand(2));
@@ -1061,14 +1061,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::RLWIMIbm:
-  case PPC::RLWIMIobm: {
+  case PPC::RLWIMIbm_rec: {
     unsigned MB, ME;
     int64_t BM = Inst.getOperand(3).getImm();
     if (!isRunOfOnes(BM, MB, ME))
       break;
 
     MCInst TmpInst;
-    TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+    TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMI_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
     TmpInst.addOperand(Inst.getOperand(1));
@@ -1079,14 +1079,14 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::RLWNMbm:
-  case PPC::RLWNMobm: {
+  case PPC::RLWNMbm_rec: {
     unsigned MB, ME;
     int64_t BM = Inst.getOperand(3).getImm();
     if (!isRunOfOnes(BM, MB, ME))
       break;
 
     MCInst TmpInst;
-    TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+    TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNM_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(Inst.getOperand(2));
@@ -1116,8 +1116,8 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
   case PPC::CP_PASTEx :
   case PPC::CP_PASTE_LAST: {
     MCInst TmpInst;
-    TmpInst.setOpcode(Opcode == PPC::CP_PASTEx ?
-                      PPC::CP_PASTE : PPC::CP_PASTEo);
+    TmpInst.setOpcode(Opcode == PPC::CP_PASTEx ? PPC::CP_PASTE
+                                               : PPC::CP_PASTE_rec);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(1));
     TmpInst.addOperand(MCOperand::createImm(Opcode == PPC::CP_PASTEx ? 0 : 1));
@@ -1786,7 +1786,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
 
 
 /// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmParser() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() {
   RegisterMCAsmParser<PPCAsmParser> A(getThePPC32Target());
   RegisterMCAsmParser<PPCAsmParser> B(getThePPC64Target());
   RegisterMCAsmParser<PPCAsmParser> C(getThePPC64LETarget());
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 3597fd15eeb1..e3c0f958c7ed 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -34,7 +34,6 @@ public:
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
-                              raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
 } // end anonymous namespace
@@ -51,7 +50,7 @@ static MCDisassembler *createPPCLEDisassembler(const Target &T,
   return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/true);
 }
 
-extern "C" void LLVMInitializePowerPCDisassembler() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCDisassembler() {
   // Register the disassembler for each target.
   TargetRegistry::RegisterMCDisassembler(getThePPC32Target(),
                                          createPPCDisassembler);
@@ -323,7 +322,7 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
 
 DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                              ArrayRef<uint8_t> Bytes,
-                                             uint64_t Address, raw_ostream &OS,
+                                             uint64_t Address,
                                              raw_ostream &CS) const {
   // Get the four bytes of the instruction.
   Size = 4;
@@ -350,4 +349,3 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
   return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
 }
-
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 7fc231618fa9..9cc1c539e24a 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -64,8 +64,9 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   OS << RegName;
 }
 
-void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot, const MCSubtargetInfo &STI) {
+void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
+                               StringRef Annot, const MCSubtargetInfo &STI,
+                               raw_ostream &O) {
   // Customize printing of the addis instruction on AIX. When an operand is a
   // symbol reference, the instruction syntax is changed to look like a load
   // operation, i.e:
@@ -193,11 +194,10 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   }
 
   if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
+    printInstruction(MI, Address, O);
   printAnnotation(O, Annot);
 }
 
-
 void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
                                            raw_ostream &O,
                                            const char *Modifier) {
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 725ae2a7081b..a3ec41aa348d 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -32,11 +32,11 @@ public:
     : MCInstPrinter(MAI, MII, MRI), TT(T) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
-                 const MCSubtargetInfo &STI) override;
+  void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+                 const MCSubtargetInfo &STI, raw_ostream &O) override;
 
   // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
   bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 1216cd727289..dc2c216a3efd 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -15,33 +15,6 @@
 
 using namespace llvm;
 
-void PPCMCAsmInfoDarwin::anchor() { }
-
-PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
-  if (is64Bit) {
-    CodePointerSize = CalleeSaveStackSlotSize = 8;
-  }
-  IsLittleEndian = false;
-
-  SeparatorString = "@";
-  CommentString = ";";
-  ExceptionsType = ExceptionHandling::DwarfCFI;
-
-  if (!is64Bit)
-    Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode.
-
-  AssemblerDialect = 1;           // New-Style mnemonics.
-  SupportsDebugInformation= true; // Debug information.
-
-  // The installed assembler for OSX < 10.6 lacks some directives.
-  // FIXME: this should really be a check on the assembler characteristics
-  // rather than OS version
-  if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
-    HasWeakDefCanBeHiddenDirective = false;
-
-  UseIntegratedAssembler = true;
-}
-
 void PPCELFMCAsmInfo::anchor() { }
 
 PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
@@ -87,4 +60,5 @@ PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
   assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
   CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
   ZeroDirective = "\t.space\t";
+  SymbolsHaveSMC = true;
 }
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 42cb62ad26a4..8c52bbbd8a56 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -20,13 +20,6 @@
 namespace llvm {
 class Triple;
 
-class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
-  virtual void anchor();
-
-public:
-  explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &);
-};
-
 class PPCELFMCAsmInfo : public MCAsmInfoELF {
   void anchor() override;
 
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 90c3c8d20edb..cbfb8e2ff282 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -30,6 +30,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -76,14 +77,13 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
 }
 
 static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
-                                     const Triple &TheTriple) {
+                                     const Triple &TheTriple,
+                                     const MCTargetOptions &Options) {
   bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
                   TheTriple.getArch() == Triple::ppc64le);
 
   MCAsmInfo *MAI;
-  if (TheTriple.isOSDarwin())
-    MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
-  else if (TheTriple.isOSBinFormatXCOFF())
+  if (TheTriple.isOSBinFormatXCOFF())
     MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple);
   else
     MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
@@ -107,8 +107,11 @@ public:
       : PPCTargetStreamer(S), OS(OS) {}
 
   void emitTCEntry(const MCSymbol &S) override {
+    const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
     OS << "\t.tc ";
-    OS << S.getName();
+    OS << (MAI->getSymbolsHaveSMC()
+               ? cast<MCSymbolXCOFF>(S).getUnqualifiedName()
+               : S.getName());
     OS << "[TC],";
     OS << S.getName();
     OS << '\n';
@@ -196,7 +199,8 @@ public:
 
   void finish() override {
     for (auto *Sym : UpdateOther)
-      copyLocalEntry(Sym, Sym->getVariableValue());
+      if (Sym->isVariable())
+        copyLocalEntry(Sym, Sym->getVariableValue());
   }
 
 private:
@@ -242,7 +246,10 @@ public:
   PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
 
   void emitTCEntry(const MCSymbol &S) override {
-    report_fatal_error("TOC entries not supported yet.");
+    const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+    const unsigned PointerSize = MAI->getCodePointerSize();
+    Streamer.EmitValueToAlignment(PointerSize);
+    Streamer.EmitSymbolValue(&S, PointerSize);
   }
 
   void emitMachine(StringRef CPU) override {
@@ -285,7 +292,7 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
   return new PPCInstPrinter(MAI, MII, MRI, T);
 }
 
-extern "C" void LLVMInitializePowerPCTargetMC() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetMC() {
   for (Target *T :
        {&getThePPC32Target(), &getThePPC64Target(), &getThePPC64LETarget()}) {
     // Register the MC asm info.
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 74b67bd2e928..49443679bb31 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -82,6 +82,30 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
   return false;
 }
 
+static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
+  if (!Val)
+    return false;
+
+  if (isShiftedMask_64(Val)) {
+    // look for the first non-zero bit
+    MB = countLeadingZeros(Val);
+    // look for the first zero bit after the run of ones
+    ME = countLeadingZeros((Val - 1) ^ Val);
+    return true;
+  } else {
+    Val = ~Val; // invert mask
+    if (isShiftedMask_64(Val)) {
+      // effectively look for the first zero bit
+      ME = countLeadingZeros(Val) - 1;
+      // effectively look for the first one bit after the run of zeros
+      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+      return true;
+    }
+  }
+  // no run present
+  return false;
+}
+
 } // end namespace llvm
 
 // Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index f6cd8ed00c82..9b3d13989ee2 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -107,7 +107,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
     (instregex "XSMAX(C|J)?DP$"),
     (instregex "XSMIN(C|J)?DP$"),
     (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
-    (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
+    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
     (instregex "POPCNT(D|W)$"),
     (instregex "CMPB(8)?$"),
     (instregex "SETB(8)?$"),
@@ -129,26 +129,26 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
     (instregex "MTV(S)?RW(A|Z)$"),
     (instregex "CMP(WI|LWI|W|LW)(8)?$"),
     (instregex "CMP(L)?D(I)?$"),
-    (instregex "SUBF(I)?C(8)?$"),
-    (instregex "ANDI(S)?o(8)?$"),
-    (instregex "ADDC(8)?$"),
-    (instregex "ADDIC(8)?(o)?$"),
-    (instregex "ADD(8|4)(o)?$"),
-    (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
-    (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"),
-    (instregex "NEG(8)?(o)?$"),
+    (instregex "SUBF(I)?C(8)?(O)?$"),
+    (instregex "ANDI(S)?(8)?(_rec)?$"),
+    (instregex "ADDC(8)?(O)?$"),
+    (instregex "ADDIC(8)?(_rec)?$"),
+    (instregex "ADD(8|4)(O)?(_rec)?$"),
+    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
+    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
+    (instregex "NEG(8)?(O)?(_rec)?$"),
     (instregex "POPCNTB$"),
     (instregex "ADD(I|IS)?(8)?$"),
     (instregex "LI(S)?(8)?$"),
-    (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
-    (instregex "NAND(8)?(o)?$"),
-    (instregex "AND(C)?(8)?(o)?$"),
-    (instregex "NOR(8)?(o)?$"),
-    (instregex "OR(C)?(8)?(o)?$"),
-    (instregex "EQV(8)?(o)?$"),
-    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
+    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
+    (instregex "NAND(8)?(_rec)?$"),
+    (instregex "AND(C)?(8)?(_rec)?$"),
+    (instregex "NOR(8)?(_rec)?$"),
+    (instregex "OR(C)?(8)?(_rec)?$"),
+    (instregex "EQV(8)?(_rec)?$"),
+    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
     (instregex "ADD(4|8)(TLS)?(_)?$"),
-    (instregex "NEG(8)?$"),
+    (instregex "NEG(8)?(O)?$"),
     (instregex "ADDI(S)?toc(HA|L)(8)?$"),
     COPY,
     MCRF,
@@ -211,8 +211,8 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
     (instregex "VABSDU(B|H|W)$"),
     (instregex "VADDU(B|H|W)S$"),
     (instregex "VAVG(S|U)(B|H|W)$"),
-    (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
-    (instregex "VCMPBFP(o)?$"),
+    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
+    (instregex "VCMPBFP(_rec)?$"),
     (instregex "VC(L|T)Z(B|H|W|D)$"),
     (instregex "VADDS(B|H|W)S$"),
     (instregex "V(MIN|MAX)FP$"),
@@ -233,43 +233,43 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
     VSUBUWS,
     VSUBCUW,
     VCMPGTSB,
-    VCMPGTSBo,
+    VCMPGTSB_rec,
     VCMPGTSD,
-    VCMPGTSDo,
+    VCMPGTSD_rec,
     VCMPGTSH,
-    VCMPGTSHo,
+    VCMPGTSH_rec,
     VCMPGTSW,
-    VCMPGTSWo,
+    VCMPGTSW_rec,
     VCMPGTUB,
-    VCMPGTUBo,
+    VCMPGTUB_rec,
     VCMPGTUD,
-    VCMPGTUDo,
+    VCMPGTUD_rec,
     VCMPGTUH,
-    VCMPGTUHo,
+    VCMPGTUH_rec,
     VCMPGTUW,
-    VCMPGTUWo,
-    VCMPNEBo,
-    VCMPNEHo,
-    VCMPNEWo,
-    VCMPNEZBo,
-    VCMPNEZHo,
-    VCMPNEZWo,
-    VCMPEQUBo,
-    VCMPEQUDo,
-    VCMPEQUHo,
-    VCMPEQUWo,
+    VCMPGTUW_rec,
+    VCMPNEB_rec,
+    VCMPNEH_rec,
+    VCMPNEW_rec,
+    VCMPNEZB_rec,
+    VCMPNEZH_rec,
+    VCMPNEZW_rec,
+    VCMPEQUB_rec,
+    VCMPEQUD_rec,
+    VCMPEQUH_rec,
+    VCMPEQUW_rec,
     XVCMPEQDP,
-    XVCMPEQDPo,
+    XVCMPEQDP_rec,
     XVCMPEQSP,
-    XVCMPEQSPo,
+    XVCMPEQSP_rec,
     XVCMPGEDP,
-    XVCMPGEDPo,
+    XVCMPGEDP_rec,
     XVCMPGESP,
-    XVCMPGESPo,
+    XVCMPGESP_rec,
     XVCMPGTDP,
-    XVCMPGTDPo,
+    XVCMPGTDP_rec,
     XVCMPGTSP,
-    XVCMPGTSPo,
+    XVCMPGTSP_rec,
     XVMAXDP,
     XVMAXSP,
     XVMINDP,
@@ -399,7 +399,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
 def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MADD(HD|HDU|LD|LD8)$"),
-    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
+    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
 )>;
 
 // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
@@ -451,14 +451,14 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
 def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    (instregex "FSEL(D|S)o$")
+    (instregex "FSEL(D|S)_rec$")
 )>;
 
 // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
 def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    (instregex "MUL(H|L)(D|W)(U)?o$")
+    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
 )>;
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
@@ -467,18 +467,18 @@ def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
 def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    (instregex "FRI(N|P|Z|M)(D|S)o$"),
-    (instregex "FRE(S)?o$"),
-    (instregex "FADD(S)?o$"),
-    (instregex "FSUB(S)?o$"),
-    (instregex "F(N)?MSUB(S)?o$"),
-    (instregex "F(N)?MADD(S)?o$"),
-    (instregex "FCFID(U)?(S)?o$"),
-    (instregex "FCTID(U)?(Z)?o$"),
-    (instregex "FCTIW(U)?(Z)?o$"),
-    (instregex "FMUL(S)?o$"),
-    (instregex "FRSQRTE(S)?o$"),
-    FRSPo
+    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
+    (instregex "FRE(S)?_rec$"),
+    (instregex "FADD(S)?_rec$"),
+    (instregex "FSUB(S)?_rec$"),
+    (instregex "F(N)?MSUB(S)?_rec$"),
+    (instregex "F(N)?MADD(S)?_rec$"),
+    (instregex "FCFID(U)?(S)?_rec$"),
+    (instregex "FCTID(U)?(Z)?_rec$"),
+    (instregex "FCTIW(U)?(Z)?_rec$"),
+    (instregex "FMUL(S)?_rec$"),
+    (instregex "FRSQRTE(S)?_rec$"),
+    FRSP_rec
 )>;
 
 // 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
@@ -613,16 +613,16 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
     XSCMPUQP,
     XSTSTDCQP,
     XSXSIGQP,
-    BCDCFNo,
-    BCDCFZo,
-    BCDCPSGNo,
-    BCDCTNo,
-    BCDCTZo,
-    BCDSETSGNo,
-    BCDSo,
-    BCDTRUNCo,
-    BCDUSo,
-    BCDUTRUNCo
+    BCDCFN_rec,
+    BCDCFZ_rec,
+    BCDCPSGN_rec,
+    BCDCTN_rec,
+    BCDCTZ_rec,
+    BCDSETSGN_rec,
+    BCDS_rec,
+    BCDTRUNC_rec,
+    BCDUS_rec,
+    BCDUTRUNC_rec
 )>;
 
 // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -630,7 +630,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
 // dispatch.
 def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
-    BCDSRo,
+    BCDSR_rec,
     XSADDQP,
     XSADDQPO,
     XSCVDPQP,
@@ -654,7 +654,7 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
 // dispatch.
 def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
-    BCDCTSQo
+    BCDCTSQ_rec
 )>;
 
 // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -679,7 +679,7 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
 // dispatch.
 def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
-    BCDCFSQo
+    BCDCFSQ_rec
 )>;
 
 // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
@@ -819,7 +819,7 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
               DISP_1C, DISP_1C],
       (instrs
     (instregex "LHA(X)?(8)?$"),
-    (instregex "CP_PASTE(8)?o$"),
+    (instregex "CP_PASTE(8)?_rec$"),
     (instregex "LWA(X)?(_32)?$"),
     TCHECK
 )>;
@@ -946,7 +946,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
 def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVW,
+    DIVWO,
     DIVWU,
+    DIVWUO,
     MODSW
 )>;
 
@@ -956,9 +958,13 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
 def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVWE,
+    DIVWEO,
     DIVD,
+    DIVDO,
     DIVWEU,
+    DIVWEUO,
     DIVDU,
+    DIVDUO,
     MODSD,
     MODUD,
     MODUW
@@ -970,7 +976,9 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
 def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVDE,
-    DIVDEU
+    DIVDEO,
+    DIVDEU,
+    DIVDEUO
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -979,7 +987,7 @@ def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
 def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
               DISP_EVEN_1C, DISP_1C],
       (instrs
-    (instregex "DIVW(U)?(O)?o$")
+    (instregex "DIVW(U)?(O)?_rec$")
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -988,10 +996,14 @@ def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
               DISP_EVEN_1C, DISP_1C],
       (instrs
-    DIVDo,
-    DIVDUo,
-    DIVWEo,
-    DIVWEUo
+    DIVD_rec,
+    DIVDO_rec,
+    DIVDU_rec,
+    DIVDUO_rec,
+    DIVWE_rec,
+    DIVWEO_rec,
+    DIVWEU_rec,
+    DIVWEUO_rec
 )>;
 
 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
@@ -1000,8 +1012,10 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
 def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
               DISP_EVEN_1C, DISP_1C],
       (instrs
-    DIVDEo,
-    DIVDEUo
+    DIVDE_rec,
+    DIVDEO_rec,
+    DIVDEU_rec,
+    DIVDEUO_rec
 )>;
 
 // CR access instructions in _BrMCR, IIC_BrMCRX.
@@ -1026,8 +1040,8 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_1C, DISP_1C],
       (instrs
-    (instregex "ADDC(8)?o$"),
-    (instregex "SUBFC(8)?o$")
+    (instregex "ADDC(8)?(O)?_rec$"),
+    (instregex "SUBFC(8)?(O)?_rec$")
 )>;
 
 // Cracked ALU operations.
@@ -1038,10 +1052,10 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
 def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    (instregex "F(N)?ABS(D|S)o$"),
-    (instregex "FCPSGN(D|S)o$"),
-    (instregex "FNEG(D|S)o$"),
-    FMRo
+    (instregex "F(N)?ABS(D|S)_rec$"),
+    (instregex "FCPSGN(D|S)_rec$"),
+    (instregex "FNEG(D|S)_rec$"),
+    FMR_rec
 )>;
 
 // Cracked ALU operations.
@@ -1063,8 +1077,8 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
-    (instregex "MTFSF(b|o)?$"),
-    (instregex "MTFSFI(o)?$")
+    (instregex "MTFSF(b|_rec)?$"),
+    (instregex "MTFSFI(_rec)?$")
 )>;
 
 // Cracked instruction made of two ALU ops.
@@ -1073,13 +1087,13 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
 def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    (instregex "RLD(I)?C(R|L)o$"),
-    (instregex "RLW(IMI|INM|NM)(8)?o$"),
-    (instregex "SLW(8)?o$"),
-    (instregex "SRAW(I)?o$"),
-    (instregex "SRW(8)?o$"),
-    RLDICL_32o,
-    RLDIMIo
+    (instregex "RLD(I)?C(R|L)_rec$"),
+    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
+    (instregex "SLW(8)?_rec$"),
+    (instregex "SRAW(I)?_rec$"),
+    (instregex "SRW(8)?_rec$"),
+    RLDICL_32_rec,
+    RLDIMI_rec
 )>;
 
 // Cracked instruction made of two ALU ops.
@@ -1088,7 +1102,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
 def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
-    (instregex "MFFS(L|CE|o)?$")
+    (instregex "MFFS(L|CE|_rec)?$")
 )>;
 
 // Cracked ALU instruction composed of three consecutive 2 cycle loads for a
@@ -1104,12 +1118,12 @@ def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
 // The two ops cannot be done in parallel.
 def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
-    (instregex "EXTSWSLI_32_64o$"),
-    (instregex "SRAD(I)?o$"),
-    EXTSWSLIo,
-    SLDo,
-    SRDo,
-    RLDICo
+    (instregex "EXTSWSLI_32_64_rec$"),
+    (instregex "SRAD(I)?_rec$"),
+    EXTSWSLI_rec,
+    SLD_rec,
+    SRD_rec,
+    RLDIC_rec
 )>;
 
 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
@@ -1122,7 +1136,7 @@ def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
 def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    FDIVo
+    FDIV_rec
 )>;
 
 // 36 Cycle DP Instruction.
@@ -1156,7 +1170,7 @@ def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
 def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    FSQRTo
+    FSQRT_rec
 )>;
 
 // 26 Cycle DP Instruction.
@@ -1175,7 +1189,7 @@ def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
 def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    FSQRTSo
+    FSQRTS_rec
 )>;
 
 // 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
@@ -1194,7 +1208,7 @@ def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
 def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
               DISP_3SLOTS_1C, DISP_1C],
       (instrs
-    FDIVSo
+    FDIVS_rec
 )>;
 
 // 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
@@ -1304,6 +1318,7 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
   BCLalways,
   BCLn,
   BCTRL8_LDinto_toc,
+  BCTRL_LWZinto_toc,
   BCn,
   CTRL_DEP
 )>;
@@ -1400,7 +1415,7 @@ def : InstRW<[],
   MBAR,
   MSYNC,
   SLBSYNC,
-  SLBFEEo,
+  SLBFEE_rec,
   NAP,
   STOP,
   TRAP,
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 0534773c4c9e..a83509f0e687 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -28,12 +28,13 @@ namespace llvm {
   class AsmPrinter;
   class MCInst;
   class MCOperand;
-
+  class ModulePass;
+  
   FunctionPass *createPPCCTRLoops();
 #ifndef NDEBUG
   FunctionPass *createPPCCTRLoopsVerify();
 #endif
-  FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM);
+  FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
   FunctionPass *createPPCTOCRegDepsPass();
   FunctionPass *createPPCEarlyReturnPass();
   FunctionPass *createPPCVSXCopyPass();
@@ -59,7 +60,7 @@ namespace llvm {
 #ifndef NDEBUG
   void initializePPCCTRLoopsVerifyPass(PassRegistry&);
 #endif
-  void initializePPCLoopPreIncPrepPass(PassRegistry&);
+  void initializePPCLoopInstrFormPrepPass(PassRegistry&);
   void initializePPCTOCRegDepsPass(PassRegistry&);
   void initializePPCEarlyReturnPass(PassRegistry&);
   void initializePPCVSXCopyPass(PassRegistry&);
@@ -77,6 +78,10 @@ namespace llvm {
 
   extern char &PPCVSXFMAMutateID;
 
+  ModulePass *createPPCLowerMASSVEntriesPass();
+  void initializePPCLowerMASSVEntriesPass(PassRegistry &);
+  extern char &PPCLowerMASSVEntriesID;
+  
   namespace PPCII {
 
   /// Target Operand Flag enum.
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 8e94a2ae15e0..bef0a81ee3ad 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -22,35 +22,37 @@ include "llvm/Target/Target.td"
 // CPU Directives                                                             //
 //===----------------------------------------------------------------------===//
 
-def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
-def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
-def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
-def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
-def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
-def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
-def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
-def Directive32  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
-def Directive64  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
-def DirectiveA2  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
-def DirectiveE500   : SubtargetFeature<"", "DarwinDirective",
+def Directive440 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_440", "">;
+def Directive601 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "CPUDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "CPUDirective", "PPC::DIR_970", "">;
+def Directive32  : SubtargetFeature<"", "CPUDirective", "PPC::DIR_32", "">;
+def Directive64  : SubtargetFeature<"", "CPUDirective", "PPC::DIR_64", "">;
+def DirectiveA2  : SubtargetFeature<"", "CPUDirective", "PPC::DIR_A2", "">;
+def DirectiveE500   : SubtargetFeature<"", "CPUDirective",
                                        "PPC::DIR_E500", "">;
-def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+def DirectiveE500mc : SubtargetFeature<"", "CPUDirective",
                                        "PPC::DIR_E500mc", "">;
-def DirectiveE5500  : SubtargetFeature<"", "DarwinDirective",
+def DirectiveE5500  : SubtargetFeature<"", "CPUDirective",
                                        "PPC::DIR_E5500", "">;
-def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
-def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
-def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr3: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR5", "">;
 def DirectivePwr5x
-    : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
-def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+    : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR5X", "">;
+def DirectivePwr6: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR6", "">;
 def DirectivePwr6x
-    : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
-def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
-def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
-def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">;
+    : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR6X", "">;
+def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">;
+def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">;
+def DirectivePwrFuture
+    : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">;
 
 def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
                                         "Enable 64-bit instructions">;
@@ -164,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
                                   "Enable Hardware Transactional Memory instructions">;
 def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
                                         "Implement mftb using the mfspr instruction">;
+def FeatureUnalignedFloats :
+  SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
+                   "true", "CPU does not trap on unaligned FP access">;
 def FeaturePPCPreRASched:
   SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
                    "Use PowerPC pre-RA scheduling strategy">;
@@ -209,36 +214,95 @@ def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
 // came before them, the idea is to make implementations of new processors
 // less error prone and easier to read.
 // Namely:
-//     list<SubtargetFeature> Power8FeatureList = ...
-//     list<SubtargetFeature> FutureProcessorSpecificFeatureList =
-//         [ features that Power8 does not support ]
-//     list<SubtargetFeature> FutureProcessorFeatureList =
-//         !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+//     list<SubtargetFeature> P8InheritableFeatures = ...
+//     list<SubtargetFeature> FutureProcessorAddtionalFeatures =
+//         [ features that Power8 does not support but inheritable ]
+//     list<SubtargetFeature> FutureProcessorSpecificFeatures =
+//         [ features that Power8 does not support and not inheritable ]
+//     list<SubtargetFeature> FutureProcessorInheritableFeatures =
+//         !listconcat(P8InheritableFeatures, FutureProcessorAddtionalFeatures)
+//     list<SubtargetFeature> FutureProcessorFeatures =
+//         !listconcat(FutureProcessorInheritableFeatures,
+//                     FutureProcessorSpecificFeatures)
 
 // Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
 // well as providing a single point of definition if the feature set will be
 // used elsewhere.
 def ProcessorFeatures {
-  list<SubtargetFeature> Power7FeatureList =
-      [DirectivePwr7, FeatureAltivec, FeatureVSX,
-       FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
-       FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-       FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
-       FeatureFPRND, FeatureFPCVT, FeatureISEL,
-       FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
-       Feature64Bit /*, Feature64BitRegs */,
-       FeatureBPERMD, FeatureExtDiv,
-       FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
-  list<SubtargetFeature> Power8SpecificFeatures =
-      [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
-       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
-  list<SubtargetFeature> Power8FeatureList =
-      !listconcat(Power7FeatureList, Power8SpecificFeatures);
-  list<SubtargetFeature> Power9SpecificFeatures =
-      [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
-       FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched];
-  list<SubtargetFeature> Power9FeatureList =
-      !listconcat(Power8FeatureList, Power9SpecificFeatures);
+  // Power7
+  list<SubtargetFeature> P7InheritableFeatures = [DirectivePwr7,
+                                                  FeatureAltivec,
+                                                  FeatureVSX,
+                                                  FeatureMFOCRF,
+                                                  FeatureFCPSGN,
+                                                  FeatureFSqrt,
+                                                  FeatureFRE,
+                                                  FeatureFRES,
+                                                  FeatureFRSQRTE,
+                                                  FeatureFRSQRTES,
+                                                  FeatureRecipPrec,
+                                                  FeatureSTFIWX,
+                                                  FeatureLFIWAX,
+                                                  FeatureFPRND,
+                                                  FeatureFPCVT,
+                                                  FeatureISEL,
+                                                  FeaturePOPCNTD,
+                                                  FeatureCMPB,
+                                                  FeatureLDBRX,
+                                                  Feature64Bit,
+                                                  /* Feature64BitRegs, */
+                                                  FeatureBPERMD,
+                                                  FeatureExtDiv,
+                                                  FeatureMFTB,
+                                                  DeprecatedDST,
+                                                  FeatureTwoConstNR,
+                                                  FeatureUnalignedFloats];
+  list<SubtargetFeature> P7SpecificFeatures = [];
+  list<SubtargetFeature> P7Features =
+    !listconcat(P7InheritableFeatures, P7SpecificFeatures);
+
+  // Power8
+  list<SubtargetFeature> P8AdditionalFeatures = [DirectivePwr8,
+                                                 FeatureP8Altivec,
+                                                 FeatureP8Vector,
+                                                 FeatureP8Crypto,
+                                                 FeatureHTM,
+                                                 FeatureDirectMove,
+                                                 FeatureICBT,
+                                                 FeaturePartwordAtomic];
+  list<SubtargetFeature> P8SpecificFeatures = [];
+  list<SubtargetFeature> P8InheritableFeatures =
+    !listconcat(P7InheritableFeatures, P8AdditionalFeatures);
+  list<SubtargetFeature> P8Features =
+    !listconcat(P8InheritableFeatures, P8SpecificFeatures);
+
+  // Power9
+  list<SubtargetFeature> P9AdditionalFeatures = [DirectivePwr9,
+                                                 FeatureP9Altivec,
+                                                 FeatureP9Vector,
+                                                 FeatureISA3_0];
+  // Some features are unique to Power9 and there is no reason to assume
+  // they will be part of any future CPUs. One example is the narrower
+  // dispatch for vector operations than scalar ones. For the time being,
+  // this list also includes scheduling-related features since we do not have
+  // enough info to create custom scheduling strategies for future CPUs.
+  list<SubtargetFeature> P9SpecificFeatures = [FeatureVectorsUseTwoUnits,
+                                               FeaturePPCPreRASched,
+                                               FeaturePPCPostRASched];
+  list<SubtargetFeature> P9InheritableFeatures =
+    !listconcat(P8InheritableFeatures, P9AdditionalFeatures);
+  list<SubtargetFeature> P9Features =
+    !listconcat(P9InheritableFeatures, P9SpecificFeatures);
+
+  // Future
+  // For future CPU we assume that all of the existing features from Power 9
+  // still exist with the exception of those we know are Power 9 specific.
+  list<SubtargetFeature> FutureAdditionalFeatures = [];
+  list<SubtargetFeature> FutureSpecificFeatures = [];
+  list<SubtargetFeature> FutureInheritableFeatures =
+    !listconcat(P9InheritableFeatures, FutureAdditionalFeatures);
+  list<SubtargetFeature> FutureFeatures =
+    !listconcat(FutureInheritableFeatures, FutureSpecificFeatures);
 }
 
 // Note: Future features to add when support is extended to more
@@ -378,7 +442,7 @@ def : ProcessorModel<"g5", G5Model,
 def : ProcessorModel<"e500", PPCE500Model,
                   [DirectiveE500,
                    FeatureICBT, FeatureBookE,
-                   FeatureISEL, FeatureMFTB]>;
+                   FeatureISEL, FeatureMFTB, FeatureSPE]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
                   [DirectiveE500mc,
                    FeatureSTFIWX, FeatureICBT, FeatureBookE,
@@ -438,9 +502,12 @@ def : ProcessorModel<"pwr6x", G5Model,
                    FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit,
                    FeatureMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
-def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
+def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
+// No scheduler model for future CPU.
+def : ProcessorModel<"future", NoSchedModel,
+                  ProcessorFeatures.FutureFeatures>;
 def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
                                        FeatureMFTB]>;
 def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
@@ -451,7 +518,7 @@ def : ProcessorModel<"ppc64", G5Model,
                    FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */,
                    FeatureMFTB]>;
-def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.Power8FeatureList>;
+def : ProcessorModel<"ppc64le", P8Model, ProcessorFeatures.P8Features>;
 
 //===----------------------------------------------------------------------===//
 // Calling Conventions
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 66236b72a1a3..4311df5dbeb8 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -43,6 +43,7 @@
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -79,9 +80,11 @@ namespace {
 class PPCAsmPrinter : public AsmPrinter {
 protected:
   MapVector<const MCSymbol *, MCSymbol *> TOC;
-  const PPCSubtarget *Subtarget;
+  const PPCSubtarget *Subtarget = nullptr;
   StackMaps SM;
 
+  virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO);
+
 public:
   explicit PPCAsmPrinter(TargetMachine &TM,
                          std::unique_ptr<MCStreamer> Streamer)
@@ -144,23 +147,12 @@ public:
   void EmitInstruction(const MachineInstr *MI) override;
 };
 
-/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
-/// OS X
-class PPCDarwinAsmPrinter : public PPCAsmPrinter {
-public:
-  explicit PPCDarwinAsmPrinter(TargetMachine &TM,
-                               std::unique_ptr<MCStreamer> Streamer)
-      : PPCAsmPrinter(TM, std::move(Streamer)) {}
-
-  StringRef getPassName() const override {
-    return "Darwin PPC Assembly Printer";
-  }
-
-  bool doFinalization(Module &M) override;
-  void EmitStartOfAsmFile(Module &M) override;
-};
-
 class PPCAIXAsmPrinter : public PPCAsmPrinter {
+private:
+  static void ValidateGV(const GlobalVariable *GV);
+protected:
+  MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override;
+
 public:
   PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
       : PPCAsmPrinter(TM, std::move(Streamer)) {}
@@ -169,6 +161,8 @@ public:
 
   void SetupMachineFunction(MachineFunction &MF) override;
 
+  const MCExpr *lowerConstant(const Constant *CV) override;
+
   void EmitGlobalVariable(const GlobalVariable *GV) override;
 
   void EmitFunctionDescriptor() override;
@@ -351,8 +345,12 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
 void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
   unsigned NumNOPBytes = MI.getOperand(1).getImm();
+  
+  auto &Ctx = OutStreamer->getContext();
+  MCSymbol *MILabel = Ctx.createTempSymbol();
+  OutStreamer->EmitLabel(MILabel);
 
-  SM.recordStackMap(MI);
+  SM.recordStackMap(*MILabel, MI);
   assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
 
   // Scan ahead to trim the shadow.
@@ -377,7 +375,11 @@ void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
 // Lower a patchpoint of the form:
 // [<def>], <id>, <numBytes>, <target>, <numArgs>
 void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
-  SM.recordPatchPoint(MI);
+  auto &Ctx = OutStreamer->getContext();
+  MCSymbol *MILabel = Ctx.createTempSymbol();
+  OutStreamer->EmitLabel(MILabel);
+
+  SM.recordPatchPoint(*MILabel, MI);
   PatchPointOpers Opers(&MI);
 
   unsigned EncodedBytes = 0;
@@ -490,9 +492,9 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
           (!Subtarget->isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
          "GETtls[ld]ADDR[32] must read GPR3");
 
-  if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
-      isPositionIndependent())
+  if (Subtarget->is32BitELFABI() && isPositionIndependent())
     Kind = MCSymbolRefExpr::VK_PLT;
+
   const MCExpr *TlsRef =
     MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
 
@@ -514,17 +516,16 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
 
 /// Map a machine operand for a TOC pseudo-machine instruction to its
 /// corresponding MCSymbol.
-static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
-                                           AsmPrinter &AP) {
+MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
   switch (MO.getType()) {
   case MachineOperand::MO_GlobalAddress:
-    return AP.getSymbol(MO.getGlobal());
+    return getSymbol(MO.getGlobal());
   case MachineOperand::MO_ConstantPoolIndex:
-    return AP.GetCPISymbol(MO.getIndex());
+    return GetCPISymbol(MO.getIndex());
   case MachineOperand::MO_JumpTableIndex:
-    return AP.GetJTISymbol(MO.getIndex());
+    return GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
-    return AP.GetBlockAddressSymbol(MO.getBlockAddress());
+    return GetBlockAddressSymbol(MO.getBlockAddress());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -688,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
-    const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+    const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO);
 
     // Create a reference to the GOT entry for the symbol. The GOT entry will be
     // synthesized later.
@@ -749,7 +750,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // global address operand to be a reference to the TOC entry we will
     // synthesize later.
     MCSymbol *TOCEntry =
-        lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
+        lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO));
 
     const MCSymbolRefExpr::VariantKind VK =
         IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
@@ -775,7 +776,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
-    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
 
     // Always use TOC on AIX. Map the global address operand to be a reference
     // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -805,7 +806,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
-    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
 
     // Always use TOC on AIX. Map the global address operand to be a reference
     // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -835,7 +836,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA8!");
 
-    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
 
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
@@ -881,7 +882,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         "LDtocL used on symbol that could be accessed directly is "
         "invalid. Must match ADDIStocHA8."));
 
-    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
+    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
 
     if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -911,7 +912,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         "Interposable definitions must use indirect access."));
 
     const MCExpr *Exp =
-        MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
+        MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO),
                                 MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
     TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
     EmitToStreamer(*OutStreamer, TmpInst);
@@ -1578,151 +1579,6 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
   }
 }
 
-void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  static const char *const CPUDirectives[] = {
-    "",
-    "ppc",
-    "ppc440",
-    "ppc601",
-    "ppc602",
-    "ppc603",
-    "ppc7400",
-    "ppc750",
-    "ppc970",
-    "ppcA2",
-    "ppce500",
-    "ppce500mc",
-    "ppce5500",
-    "power3",
-    "power4",
-    "power5",
-    "power5x",
-    "power6",
-    "power6x",
-    "power7",
-    // FIXME: why is power8 missing here?
-    "ppc64",
-    "ppc64le",
-    "power9"
-  };
-
-  // Get the numerically largest directive.
-  // FIXME: How should we merge darwin directives?
-  unsigned Directive = PPC::DIR_NONE;
-  for (const Function &F : M) {
-    const PPCSubtarget &STI = TM.getSubtarget<PPCSubtarget>(F);
-    unsigned FDir = STI.getDarwinDirective();
-    Directive = Directive > FDir ? FDir : STI.getDarwinDirective();
-    if (STI.hasMFOCRF() && Directive < PPC::DIR_970)
-      Directive = PPC::DIR_970;
-    if (STI.hasAltivec() && Directive < PPC::DIR_7400)
-      Directive = PPC::DIR_7400;
-    if (STI.isPPC64() && Directive < PPC::DIR_64)
-      Directive = PPC::DIR_64;
-  }
-
-  assert(Directive <= PPC::DIR_64 && "Directive out of range.");
-
-  assert(Directive < array_lengthof(CPUDirectives) &&
-         "CPUDirectives[] might not be up-to-date!");
-  PPCTargetStreamer &TStreamer =
-      *static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
-  TStreamer.emitMachine(CPUDirectives[Directive]);
-
-  // Prime text sections so they are adjacent.  This reduces the likelihood a
-  // large data or debug section causes a branch to exceed 16M limit.
-  const TargetLoweringObjectFileMachO &TLOFMacho =
-      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
-  OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    OutStreamer->SwitchSection(
-           OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
-                                      MachO::S_SYMBOL_STUBS |
-                                      MachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      32, SectionKind::getText()));
-  } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
-    OutStreamer->SwitchSection(
-           OutContext.getMachOSection("__TEXT","__symbol_stub1",
-                                      MachO::S_SYMBOL_STUBS |
-                                      MachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      16, SectionKind::getText()));
-  }
-  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-}
-
-bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
-
-  // Darwin/PPC always uses mach-o.
-  const TargetLoweringObjectFileMachO &TLOFMacho =
-      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
-  if (MMI) {
-    MachineModuleInfoMachO &MMIMacho =
-        MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
-    if (MAI->doesSupportExceptionHandling()) {
-      // Add the (possibly multiple) personalities to the set of global values.
-      // Only referenced functions get into the Personalities list.
-      for (const Function *Personality : MMI->getPersonalities()) {
-        if (Personality) {
-          MCSymbol *NLPSym =
-              getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
-          MachineModuleInfoImpl::StubValueTy &StubSym =
-              MMIMacho.getGVStubEntry(NLPSym);
-          StubSym =
-              MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
-        }
-      }
-    }
-
-    // Output stubs for dynamically-linked functions.
-    MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
-
-    // Output macho stubs for external and common global variables.
-    if (!Stubs.empty()) {
-      // Switch with ".non_lazy_symbol_pointer" directive.
-      OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
-      EmitAlignment(isPPC64 ? Align(8) : Align(4));
-
-      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        // L_foo$stub:
-        OutStreamer->EmitLabel(Stubs[i].first);
-        //   .indirect_symbol _foo
-        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
-        OutStreamer->EmitSymbolAttribute(MCSym.getPointer(),
-                                         MCSA_IndirectSymbol);
-
-        if (MCSym.getInt())
-          // External to current translation unit.
-          OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4 /*size*/);
-        else
-          // Internal to current translation unit.
-          //
-          // When we place the LSDA into the TEXT section, the type info
-          // pointers
-          // need to be indirect and pc-rel. We accomplish this by using NLPs.
-          // However, sometimes the types are local to the file. So we need to
-          // fill in the value for the NLP in those cases.
-          OutStreamer->EmitValue(
-              MCSymbolRefExpr::create(MCSym.getPointer(), OutContext),
-              isPPC64 ? 8 : 4 /*size*/);
-      }
-
-      Stubs.clear();
-      OutStreamer->AddBlankLine();
-    }
-  }
-
-  // Funny Darwin hack: This flag tells the linker that no global symbols
-  // contain code that falls through to other global symbols (e.g. the obvious
-  // implementation of multiple entry points).  If this doesn't occur, the
-  // linker can safely perform dead code stripping.  Since LLVM never generates
-  // code that does this, it is always safe to set.
-  OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-
-  return AsmPrinter::doFinalization(M);
-}
-
 void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   // Get the function descriptor symbol.
   CurrentFnDescSym = getSymbol(&MF.getFunction());
@@ -1735,7 +1591,7 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   return AsmPrinter::SetupMachineFunction(MF);
 }
 
-void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
   // Early error checking limiting what is supported.
   if (GV->isThreadLocal())
     report_fatal_error("Thread local not yet supported on AIX.");
@@ -1745,22 +1601,51 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   if (GV->hasComdat())
     report_fatal_error("COMDAT not yet supported by AIX.");
+}
+
+const MCExpr *PPCAIXAsmPrinter::lowerConstant(const Constant *CV) {
+  if (const Function *F = dyn_cast<Function>(CV)) {
+    MCSymbolXCOFF *FSym = cast<MCSymbolXCOFF>(getSymbol(F));
+    if (!FSym->hasContainingCsect()) {
+      const XCOFF::StorageClass SC =
+          F->isDeclaration()
+              ? TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F)
+              : XCOFF::C_HIDEXT;
+      MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
+          FSym->getName(), XCOFF::XMC_DS,
+          F->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD, SC,
+          SectionKind::getData());
+      FSym->setContainingCsect(Csect);
+    }
+    return MCSymbolRefExpr::create(
+        FSym->getContainingCsect()->getQualNameSymbol(), OutContext);
+  }
+  return PPCAsmPrinter::lowerConstant(CV);
+}
+
+void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  ValidateGV(GV);
+
+  // External global variables are already handled.
+  if (!GV->hasInitializer())
+    return;
+
+  // Create the symbol, set its storage class.
+  MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
+  GVSym->setStorageClass(
+      TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
 
   SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
-  if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData())
+  if ((!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly()) ||
+      GVKind.isMergeable2ByteCString() || GVKind.isMergeable4ByteCString())
     report_fatal_error("Encountered a global variable kind that is "
                        "not supported yet.");
 
   // Create the containing csect and switch to it.
-  MCSectionXCOFF *CSect = cast<MCSectionXCOFF>(
+  MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
       getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
-  OutStreamer->SwitchSection(CSect);
-
-  // Create the symbol, set its storage class, and emit it.
-  MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV));
-  GVSym->setStorageClass(
-      TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
-  GVSym->setContainingCsect(CSect);
+  OutStreamer->SwitchSection(Csect);
+  GVSym->setContainingCsect(Csect);
 
   const DataLayout &DL = GV->getParent()->getDataLayout();
 
@@ -1771,9 +1656,10 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
 
     if (GVKind.isBSSLocal())
-      OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align);
+      OutStreamer->EmitXCOFFLocalCommonSymbol(
+          GVSym, Size, Csect->getQualNameSymbol(), Align);
     else
-      OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+      OutStreamer->EmitCommonSymbol(Csect->getQualNameSymbol(), Size, Align);
     return;
   }
 
@@ -1797,7 +1683,10 @@ void PPCAIXAsmPrinter::EmitFunctionDescriptor() {
   OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
                          PointerSize);
   // Emit TOC base address.
-  MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
+  const MCSectionXCOFF *TOCBaseSec = OutStreamer->getContext().getXCOFFSection(
+      StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
+      SectionKind::getData());
+  const MCSymbol *TOCBaseSym = TOCBaseSec->getQualNameSymbol();
   OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
                          PointerSize);
   // Emit a null environment pointer.
@@ -1813,15 +1702,90 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
     return;
 
   // Emit TOC base.
-  MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]"));
   MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection(
       StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT,
       SectionKind::getData());
-  cast<MCSymbolXCOFF>(TOCBaseSym)->setContainingCsect(TOCBaseSection);
+  // The TOC-base always has 0 size, but 4 byte alignment.
+  TOCBaseSection->setAlignment(Align(4));
   // Switch to section to emit TOC base.
   OutStreamer->SwitchSection(TOCBaseSection);
+
+  PPCTargetStreamer &TS =
+      static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
+
+  for (auto &I : TOC) {
+    // Setup the csect for the current TC entry.
+    MCSectionXCOFF *TCEntry = OutStreamer->getContext().getXCOFFSection(
+        cast<MCSymbolXCOFF>(I.first)->getUnqualifiedName(), XCOFF::XMC_TC,
+        XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
+    cast<MCSymbolXCOFF>(I.second)->setContainingCsect(TCEntry);
+    OutStreamer->SwitchSection(TCEntry);
+
+    OutStreamer->EmitLabel(I.second);
+    TS.emitTCEntry(*I.first);
+  }
 }
 
+MCSymbol *
+PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
+  const GlobalObject *GO = nullptr;
+
+  // If the MO is a function or certain kind of globals, we want to make sure to
+  // refer to the csect symbol, otherwise we can just do the default handling.
+  if (MO.getType() != MachineOperand::MO_GlobalAddress ||
+      !(GO = dyn_cast<const GlobalObject>(MO.getGlobal())))
+    return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO);
+
+  // Do an early error check for globals we don't support. This will go away
+  // eventually.
+  const auto *GV = dyn_cast<const GlobalVariable>(GO);
+  if (GV) {
+    ValidateGV(GV);
+  }
+
+  MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(getSymbol(GO));
+
+  // If the global object is a global variable without initializer or is a
+  // declaration of a function, then XSym is an external referenced symbol.
+  // Hence we may need to explictly create a MCSectionXCOFF for it so that we
+  // can return its symbol later.
+  if (GO->isDeclaration()) {
+    if (!XSym->hasContainingCsect()) {
+      // Make sure the storage class is set.
+      const XCOFF::StorageClass SC =
+          TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+      XSym->setStorageClass(SC);
+
+      MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection(
+          XSym->getName(), isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA,
+          XCOFF::XTY_ER, SC, SectionKind::getMetadata());
+      XSym->setContainingCsect(Csect);
+    }
+
+    return XSym->getContainingCsect()->getQualNameSymbol();
+  }
+
+  // Handle initialized global variables and defined functions.
+  SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
+
+  if (GOKind.isText()) {
+    // If the MO is a function, we want to make sure to refer to the function
+    // descriptor csect.
+    return OutStreamer->getContext()
+        .getXCOFFSection(XSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD,
+                         XCOFF::C_HIDEXT, SectionKind::getData())
+        ->getQualNameSymbol();
+  } else if (GOKind.isCommon() || GOKind.isBSSLocal()) {
+    // If the operand is a common then we should refer to the csect symbol.
+    return cast<MCSectionXCOFF>(
+               getObjFileLowering().SectionForGlobal(GO, GOKind, TM))
+        ->getQualNameSymbol();
+  }
+
+  // Other global variables are refered to by labels inside of a single csect,
+  // so refer to the label directly.
+  return getSymbol(GV);
+}
 
 /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
 /// for a MachineFunction to the given output stream, in a format that the
@@ -1830,8 +1794,6 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) {
 static AsmPrinter *
 createPPCAsmPrinterPass(TargetMachine &tm,
                         std::unique_ptr<MCStreamer> &&Streamer) {
-  if (tm.getTargetTriple().isMacOSX())
-    return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
   if (tm.getTargetTriple().isOSAIX())
     return new PPCAIXAsmPrinter(tm, std::move(Streamer));
 
@@ -1839,7 +1801,7 @@ createPPCAsmPrinterPass(TargetMachine &tm,
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmPrinter() {
   TargetRegistry::RegisterAsmPrinter(getThePPC32Target(),
                                      createPPCAsmPrinterPass);
   TargetRegistry::RegisterAsmPrinter(getThePPC64Target(),
diff --git a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index d325b078979f..109b665e0d57 100644
--- a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2b8d9b87724f..4ce705300e1b 100644
--- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -33,10 +33,8 @@
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/IR/Constants.h"
@@ -47,6 +45,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassSupport.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -54,6 +53,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
 #ifndef NDEBUG
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 06a4d183e781..4c608520e265 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -782,8 +782,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
   DebugLoc dl;
-  bool needsCFI = MMI.hasDebugInfo() ||
-    MF.getFunction().needsUnwindTableEntry();
+  bool needsCFI = MF.needsFrameMoves();
 
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
@@ -2442,10 +2441,6 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 }
 
 unsigned PPCFrameLowering::getTOCSaveOffset() const {
-  if (Subtarget.isAIXABI())
-    // TOC save/restore is normally handled by the linker.
-    // Indirect calls should hit this limitation.
-    report_fatal_error("TOC save is not implemented on AIX yet.");
   return TOCSaveOffset;
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 391ebcc1a143..ffaa3e05c847 100644
--- a/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -158,7 +158,7 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
   // new group.
   if (isLoadAfterStore(SU) && CurSlots < 6) {
     unsigned Directive =
-        DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+        DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
     // If we're using a special group-terminating nop, then we need only one.
     // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
     if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
@@ -218,7 +218,7 @@ void PPCDispatchGroupSBHazardRecognizer::Reset() {
 
 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
   unsigned Directive =
-      DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+      DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
   // If the group has now filled all of its slots, or if we're using a special
   // group-terminating nop, the group is complete.
   // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4ad6c88233fe..776ec52e2604 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -138,9 +138,9 @@ namespace {
   ///
   class PPCDAGToDAGISel : public SelectionDAGISel {
     const PPCTargetMachine &TM;
-    const PPCSubtarget *PPCSubTarget;
-    const PPCTargetLowering *PPCLowering;
-    unsigned GlobalBaseReg;
+    const PPCSubtarget *PPCSubTarget = nullptr;
+    const PPCTargetLowering *PPCLowering = nullptr;
+    unsigned GlobalBaseReg = 0;
 
   public:
     explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
@@ -204,6 +204,7 @@ namespace {
     bool tryBitfieldInsert(SDNode *N);
     bool tryBitPermutation(SDNode *N);
     bool tryIntCompareInGPR(SDNode *N);
+    bool tryAndWithMask(SDNode *N);
 
     // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
     // an X-Form load instruction with the offset being a relocation coming from
@@ -309,7 +310,6 @@ namespace {
         errs() << "ConstraintID: " << ConstraintID << "\n";
         llvm_unreachable("Unexpected asm memory constraint");
       case InlineAsm::Constraint_es:
-      case InlineAsm::Constraint_i:
       case InlineAsm::Constraint_m:
       case InlineAsm::Constraint_o:
       case InlineAsm::Constraint_Q:
@@ -512,13 +512,14 @@ static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
   return isInt64Immediate(N.getNode(), Imm);
 }
 
-static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+static unsigned getBranchHint(unsigned PCC,
+                              const FunctionLoweringInfo &FuncInfo,
                               const SDValue &DestMBB) {
   assert(isa<BasicBlockSDNode>(DestMBB));
 
-  if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+  if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
 
-  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+  const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
   const Instruction *BBTerm = BB->getTerminator();
 
   if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
@@ -526,8 +527,8 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
   const BasicBlock *TBB = BBTerm->getSuccessor(0);
   const BasicBlock *FBB = BBTerm->getSuccessor(1);
 
-  auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
-  auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+  auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
+  auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
 
   // We only want to handle cases which are easy to predict at static time, e.g.
   // C++ throw statement, that is very likely not taken, or calling never
@@ -547,7 +548,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
   if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
     return PPC::BR_NO_HINT;
 
-  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName()
+  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
                     << "::" << BB->getName() << "'\n"
                     << " -> " << TBB->getName() << ": " << TProb << "\n"
                     << " -> " << FBB->getName() << ": " << FProb << "\n");
@@ -1044,7 +1045,7 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
       if (Use->isMachineOpcode())
         return 0;
       MaxTruncation =
-        std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
+        std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
       continue;
     case ISD::STORE: {
       if (Use->isMachineOpcode())
@@ -1399,11 +1400,14 @@ class BitPermutationSelector {
       for (unsigned i = 0; i < NumValidBits; ++i)
         Bits[i] = (*LHSBits)[i];
 
-      // These bits are known to be zero.
+      // These bits are known to be zero but the AssertZext may be from a value
+      // that already has some constant zero bits (i.e. from a masking and).
       for (unsigned i = NumValidBits; i < NumBits; ++i)
-        Bits[i] = ValueBit((*LHSBits)[i].getValue(),
-                           (*LHSBits)[i].getValueBitIndex(),
-                           ValueBit::VariableKnownToBeZero);
+        Bits[i] = (*LHSBits)[i].hasValue()
+                      ? ValueBit((*LHSBits)[i].getValue(),
+                                 (*LHSBits)[i].getValueBitIndex(),
+                                 ValueBit::VariableKnownToBeZero)
+                      : ValueBit(ValueBit::ConstZero);
 
       return std::make_pair(Interesting, &Bits);
     }
@@ -1811,11 +1815,14 @@ class BitPermutationSelector {
 
       SDValue ANDIVal, ANDISVal;
       if (ANDIMask != 0)
-        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
-                            VRot, getI32Imm(ANDIMask, dl)), 0);
+        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
+                                                 VRot, getI32Imm(ANDIMask, dl)),
+                          0);
       if (ANDISMask != 0)
-        ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
-                             VRot, getI32Imm(ANDISMask, dl)), 0);
+        ANDISVal =
+            SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
+                                           getI32Imm(ANDISMask, dl)),
+                    0);
 
       SDValue TotalVal;
       if (!ANDIVal)
@@ -1904,11 +1911,14 @@ class BitPermutationSelector {
 
       SDValue ANDIVal, ANDISVal;
       if (ANDIMask != 0)
-        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
-                            Res, getI32Imm(ANDIMask, dl)), 0);
+        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
+                                                 Res, getI32Imm(ANDIMask, dl)),
+                          0);
       if (ANDISMask != 0)
-        ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
-                             Res, getI32Imm(ANDISMask, dl)), 0);
+        ANDISVal =
+            SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
+                                           getI32Imm(ANDISMask, dl)),
+                    0);
 
       if (!ANDIVal)
         Res = ANDISVal;
@@ -2181,15 +2191,16 @@ class BitPermutationSelector {
 
         SDValue ANDIVal, ANDISVal;
         if (ANDIMask != 0)
-          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
+          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
                                                    ExtendToInt64(VRot, dl),
                                                    getI32Imm(ANDIMask, dl)),
                             0);
         if (ANDISMask != 0)
-          ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
-                                                    ExtendToInt64(VRot, dl),
-                                                    getI32Imm(ANDISMask, dl)),
-                             0);
+          ANDISVal =
+              SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
+                                             ExtendToInt64(VRot, dl),
+                                             getI32Imm(ANDISMask, dl)),
+                      0);
 
         if (!ANDIVal)
           TotalVal = ANDISVal;
@@ -2330,11 +2341,16 @@ class BitPermutationSelector {
 
         SDValue ANDIVal, ANDISVal;
         if (ANDIMask != 0)
-          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
-                              ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
+          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
+                                                   ExtendToInt64(Res, dl),
+                                                   getI32Imm(ANDIMask, dl)),
+                            0);
         if (ANDISMask != 0)
-          ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
-                               ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
+          ANDISVal =
+              SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
+                                             ExtendToInt64(Res, dl),
+                                             getI32Imm(ANDISMask, dl)),
+                      0);
 
         if (!ANDIVal)
           Res = ANDISVal;
@@ -2385,7 +2401,7 @@ class BitPermutationSelector {
 
   SmallVector<ValueBit, 64> Bits;
 
-  bool NeedMask;
+  bool NeedMask = false;
   SmallVector<unsigned, 64> RLAmt;
 
   SmallVector<BitGroup, 16> BitGroups;
@@ -2393,7 +2409,7 @@ class BitPermutationSelector {
   DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
   SmallVector<ValueRotInfo, 16> ValueRotsVec;
 
-  SelectionDAG *CurDAG;
+  SelectionDAG *CurDAG = nullptr;
 
 public:
   BitPermutationSelector(SelectionDAG *DAG)
@@ -2623,8 +2639,9 @@ SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
     assert((NewOpc != -1 || !IsBitwiseNegate) &&
            "No record form available for AND8/OR8/XOR8?");
     WideOp =
-      SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
-                                     MVT::i64, MVT::Glue, LHS, RHS), 0);
+        SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
+                                       dl, MVT::i64, MVT::Glue, LHS, RHS),
+                0);
   }
 
   // Select this node to a single bit from CR0 set by the record-form node
@@ -3597,7 +3614,7 @@ SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
 
   if (ConvOpts == SetccInGPROpts::ZExtInvert ||
       ConvOpts == SetccInGPROpts::SExtInvert)
-    CC = ISD::getSetCCInverse(CC, true);
+    CC = ISD::getSetCCInverse(CC, InputVT);
 
   bool Inputs32Bit = InputVT == MVT::i32;
 
@@ -3832,7 +3849,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
 }
 
-static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
+                                           const PPCSubtarget *Subtarget) {
+  // For SPE instructions, the result is in GT bit of the CR
+  bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
+
   switch (CC) {
   case ISD::SETUEQ:
   case ISD::SETONE:
@@ -3841,17 +3862,23 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
     llvm_unreachable("Should be lowered by legalize!");
   default: llvm_unreachable("Unknown condition!");
   case ISD::SETOEQ:
-  case ISD::SETEQ:  return PPC::PRED_EQ;
+  case ISD::SETEQ:
+    return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
   case ISD::SETUNE:
-  case ISD::SETNE:  return PPC::PRED_NE;
+  case ISD::SETNE:
+    return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
   case ISD::SETOLT:
-  case ISD::SETLT:  return PPC::PRED_LT;
+  case ISD::SETLT:
+    return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
   case ISD::SETULE:
-  case ISD::SETLE:  return PPC::PRED_LE;
+  case ISD::SETLE:
+    return UseSPE ? PPC::PRED_LE : PPC::PRED_LE;
   case ISD::SETOGT:
-  case ISD::SETGT:  return PPC::PRED_GT;
+  case ISD::SETGT:
+    return UseSPE ? PPC::PRED_GT : PPC::PRED_GT;
   case ISD::SETUGE:
-  case ISD::SETGE:  return PPC::PRED_GE;
+  case ISD::SETGE:
+    return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
   case ISD::SETO:   return PPC::PRED_NU;
   case ISD::SETUO:  return PPC::PRED_UN;
     // These two are invalid for floating point.  Assume we have int.
@@ -4344,6 +4371,142 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
   return true;
 }
 
+bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
+  if (N->getOpcode() != ISD::AND)
+    return false;
+
+  SDLoc dl(N);
+  SDValue Val = N->getOperand(0);
+  unsigned Imm, Imm2, SH, MB, ME;
+  uint64_t Imm64;
+
+  // If this is an and of a value rotated between 0 and 31 bits and then and'd
+  // with a mask, emit rlwinm
+  if (isInt32Immediate(N->getOperand(1), Imm) &&
+      isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+    SDValue Val = N->getOperand(0).getOperand(0);
+    SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
+                      getI32Imm(ME, dl) };
+    CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+    return true;
+  }
+
+  // If this is just a masked value where the input is not handled, and
+  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+  if (isInt32Immediate(N->getOperand(1), Imm)) {
+    if (isRunOfOnes(Imm, MB, ME) &&
+        N->getOperand(0).getOpcode() != ISD::ROTL) {
+      SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
+                        getI32Imm(ME, dl) };
+      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+      return true;
+    }
+    // AND X, 0 -> 0, not "rlwinm 32".
+    if (Imm == 0) {
+      ReplaceUses(SDValue(N, 0), N->getOperand(1));
+      return true;
+    }
+
+    // ISD::OR doesn't get all the bitfield insertion fun.
+    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
+    // bitfield insert.
+    if (N->getOperand(0).getOpcode() == ISD::OR &&
+        isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+      // The idea here is to check whether this is equivalent to:
+      //   (c1 & m) | (x & ~m)
+      // where m is a run-of-ones mask. The logic here is that, for each bit in
+      // c1 and c2:
+      //  - if both are 1, then the output will be 1.
+      //  - if both are 0, then the output will be 0.
+      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
+      //    come from x.
+      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
+      //    be 0.
+      //  If that last condition is never the case, then we can form m from the
+      //  bits that are the same between c1 and c2.
+      unsigned MB, ME;
+      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
+        SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                            N->getOperand(0).getOperand(1),
+                            getI32Imm(0, dl), getI32Imm(MB, dl),
+                            getI32Imm(ME, dl) };
+        ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
+        return true;
+      }
+    }
+  } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) {
+    // If this is a 64-bit zero-extension mask, emit rldicl.
+    if (isMask_64(Imm64)) {
+      MB = 64 - countTrailingOnes(Imm64);
+      SH = 0;
+
+      if (Val.getOpcode() == ISD::ANY_EXTEND) {
+        auto Op0 = Val.getOperand(0);
+        if ( Op0.getOpcode() == ISD::SRL &&
+           isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+           auto ResultType = Val.getNode()->getValueType(0);
+           auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
+                                               ResultType);
+           SDValue IDVal (ImDef, 0);
+
+           Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
+                         ResultType, IDVal, Op0.getOperand(0),
+                         getI32Imm(1, dl)), 0);
+           SH = 64 - Imm;
+        }
+      }
+
+      // If the operand is a logical right shift, we can fold it into this
+      // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+      // for n <= mb. The right shift is really a left rotate followed by a
+      // mask, and this mask is a more-restrictive sub-mask of the mask implied
+      // by the shift.
+      if (Val.getOpcode() == ISD::SRL &&
+          isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+        assert(Imm < 64 && "Illegal shift amount");
+        Val = Val.getOperand(0);
+        SH = 64 - Imm;
+      }
+
+      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+      CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+      return true;
+    } else if (isMask_64(~Imm64)) {
+      // If this is a negated 64-bit zero-extension mask,
+      // i.e. the immediate is a sequence of ones from most significant side
+      // and all zero for reminder, we should use rldicr.
+      MB = 63 - countTrailingOnes(~Imm64);
+      SH = 0;
+      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+      CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+      return true;
+    }
+
+    // It is not 16-bit imm that means we need two instructions at least if
+    // using "and" instruction. Try to exploit it with rotate mask instructions.
+    if (isRunOfOnes64(Imm64, MB, ME)) {
+      if (MB >= 32 && MB <= ME) {
+        //                MB  ME
+        // +----------------------+
+        // |xxxxxxxxxxx00011111000|
+        // +----------------------+
+        //  0         32         64
+        // We can only do it if the MB is larger than 32 and MB <= ME
+        // as RLWINM will replace the content of [0 - 32) with [32 - 64) even
+        // we didn't rotate it.
+        SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
+                          getI64Imm(ME - 32, dl) };
+        CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
+        return true;
+      }
+      // TODO - handle it with rldicl + rldicl
+    }
+  }
+
+  return false;
+}
+
 // Select - Convert the specified operand from a target-independent to a
 // target-specific node if it hasn't already been changed.
 void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -4565,121 +4728,13 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     }
   }
 
-  case ISD::AND: {
-    unsigned Imm, Imm2, SH, MB, ME;
-    uint64_t Imm64;
-
-    // If this is an and of a value rotated between 0 and 31 bits and then and'd
-    // with a mask, emit rlwinm
-    if (isInt32Immediate(N->getOperand(1), Imm) &&
-        isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
-      SDValue Val = N->getOperand(0).getOperand(0);
-      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
-                        getI32Imm(ME, dl) };
-      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
-      return;
-    }
-    // If this is just a masked value where the input is not handled above, and
-    // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
-    if (isInt32Immediate(N->getOperand(1), Imm) &&
-        isRunOfOnes(Imm, MB, ME) &&
-        N->getOperand(0).getOpcode() != ISD::ROTL) {
-      SDValue Val = N->getOperand(0);
-      SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
-                        getI32Imm(ME, dl) };
-      CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
-      return;
-    }
-    // If this is a 64-bit zero-extension mask, emit rldicl.
-    if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
-        isMask_64(Imm64)) {
-      SDValue Val = N->getOperand(0);
-      MB = 64 - countTrailingOnes(Imm64);
-      SH = 0;
-
-      if (Val.getOpcode() == ISD::ANY_EXTEND) {
-        auto Op0 = Val.getOperand(0);
-        if ( Op0.getOpcode() == ISD::SRL &&
-           isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
-
-           auto ResultType = Val.getNode()->getValueType(0);
-           auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
-                                               ResultType);
-           SDValue IDVal (ImDef, 0);
-
-           Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
-                         ResultType, IDVal, Op0.getOperand(0),
-                         getI32Imm(1, dl)), 0);
-           SH = 64 - Imm;
-        }
-      }
-
-      // If the operand is a logical right shift, we can fold it into this
-      // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
-      // for n <= mb. The right shift is really a left rotate followed by a
-      // mask, and this mask is a more-restrictive sub-mask of the mask implied
-      // by the shift.
-      if (Val.getOpcode() == ISD::SRL &&
-          isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
-        assert(Imm < 64 && "Illegal shift amount");
-        Val = Val.getOperand(0);
-        SH = 64 - Imm;
-      }
-
-      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
-      CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
-      return;
-    }
-    // If this is a negated 64-bit zero-extension mask,
-    // i.e. the immediate is a sequence of ones from most significant side
-    // and all zero for reminder, we should use rldicr.
-    if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
-        isMask_64(~Imm64)) {
-      SDValue Val = N->getOperand(0);
-      MB = 63 - countTrailingOnes(~Imm64);
-      SH = 0;
-      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
-      CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
-      return;
-    }
-
-    // AND X, 0 -> 0, not "rlwinm 32".
-    if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
-      ReplaceUses(SDValue(N, 0), N->getOperand(1));
+  case ISD::AND:
+    // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
+    if (tryAndWithMask(N))
       return;
-    }
-    // ISD::OR doesn't get all the bitfield insertion fun.
-    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
-    // bitfield insert.
-    if (isInt32Immediate(N->getOperand(1), Imm) &&
-        N->getOperand(0).getOpcode() == ISD::OR &&
-        isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
-      // The idea here is to check whether this is equivalent to:
-      //   (c1 & m) | (x & ~m)
-      // where m is a run-of-ones mask. The logic here is that, for each bit in
-      // c1 and c2:
-      //  - if both are 1, then the output will be 1.
-      //  - if both are 0, then the output will be 0.
-      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
-      //    come from x.
-      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
-      //    be 0.
-      //  If that last condition is never the case, then we can form m from the
-      //  bits that are the same between c1 and c2.
-      unsigned MB, ME;
-      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
-        SDValue Ops[] = { N->getOperand(0).getOperand(0),
-                            N->getOperand(0).getOperand(1),
-                            getI32Imm(0, dl), getI32Imm(MB, dl),
-                            getI32Imm(ME, dl) };
-        ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
-        return;
-      }
-    }
 
     // Other cases are autogenerated.
     break;
-  }
   case ISD::OR: {
     if (N->getValueType(0) == MVT::i32)
       if (tryBitfieldInsert(N))
@@ -4781,24 +4836,24 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     break;
   }
   // FIXME: Remove this once the ANDI glue bug is fixed:
-  case PPCISD::ANDIo_1_EQ_BIT:
-  case PPCISD::ANDIo_1_GT_BIT: {
+  case PPCISD::ANDI_rec_1_EQ_BIT:
+  case PPCISD::ANDI_rec_1_GT_BIT: {
     if (!ANDIGlueBug)
       break;
 
     EVT InVT = N->getOperand(0).getValueType();
     assert((InVT == MVT::i64 || InVT == MVT::i32) &&
-           "Invalid input type for ANDIo_1_EQ_BIT");
+           "Invalid input type for ANDI_rec_1_EQ_BIT");
 
-    unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
+    unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
     SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
                                         N->getOperand(0),
                                         CurDAG->getTargetConstant(1, dl, InVT)),
                  0);
     SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
-    SDValue SRIdxVal =
-      CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
-                                PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
+    SDValue SRIdxVal = CurDAG->getTargetConstant(
+        N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
+        dl, MVT::i32);
 
     CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
                          SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
@@ -4889,7 +4944,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
       return;
     }
 
-    unsigned BROpc = getPredicateForSetCC(CC);
+    unsigned BROpc =
+        getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget);
 
     unsigned SelectCCOp;
     if (N->getValueType(0) == MVT::i32)
@@ -5002,7 +5058,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     // Prevent PPC::PRED_* from being selected into LI.
     unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
     if (EnableBranchHint)
-      PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+      PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
 
     SDValue Pred = getI32Imm(PCC, dl);
     SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
@@ -5012,7 +5068,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::BR_CC: {
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
-    unsigned PCC = getPredicateForSetCC(CC);
+    unsigned PCC =
+        getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget);
 
     if (N->getOperand(2).getValueType() == MVT::i1) {
       unsigned Opc;
@@ -5045,7 +5102,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
     }
 
     if (EnableBranchHint)
-      PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+      PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
 
     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
     SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
@@ -6181,8 +6238,8 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
   // For ANDI and ANDIS, the higher-order bits are zero if either that is true
   // of the first operand, or if the second operand is positive (so that it is
   // not sign extended).
-  if (Op32.getMachineOpcode() == PPC::ANDIo ||
-      Op32.getMachineOpcode() == PPC::ANDISo) {
+  if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
+      Op32.getMachineOpcode() == PPC::ANDIS_rec) {
     SmallPtrSet<SDNode *, 16> ToPromote1;
     bool Op0OK =
       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
@@ -6304,8 +6361,12 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
       case PPC::ORI:       NewOpcode = PPC::ORI8; break;
       case PPC::ORIS:      NewOpcode = PPC::ORIS8; break;
       case PPC::AND:       NewOpcode = PPC::AND8; break;
-      case PPC::ANDIo:     NewOpcode = PPC::ANDIo8; break;
-      case PPC::ANDISo:    NewOpcode = PPC::ANDISo8; break;
+      case PPC::ANDI_rec:
+        NewOpcode = PPC::ANDI8_rec;
+        break;
+      case PPC::ANDIS_rec:
+        NewOpcode = PPC::ANDIS8_rec;
+        break;
       }
 
       // Note: During the replacement process, the nodes will be in an
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8cf6a660b08b..60ed72e1018b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -52,6 +52,7 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallSite.h"
@@ -66,6 +67,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -119,6 +121,9 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
 static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
 cl::desc("enable quad precision float support on ppc"), cl::Hidden);
 
+static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
+cl::desc("use absolute jump tables on ppc"), cl::Hidden);
+
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumSiblingCalls, "Number of sibling calls");
 
@@ -132,10 +137,6 @@ extern cl::opt<bool> ANDIGlueBug;
 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
                                      const PPCSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
-  // Use _setjmp/_longjmp instead of setjmp/longjmp.
-  setUseUnderscoreSetJmp(true);
-  setUseUnderscoreLongJmp(true);
-
   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
   // arguments are at least 4/8 bytes aligned.
   bool isPPC64 = Subtarget.isPPC64();
@@ -389,6 +390,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::LRINT, MVT::f64, Legal);
+      setOperationAction(ISD::LRINT, MVT::f32, Legal);
+      setOperationAction(ISD::LLRINT, MVT::f64, Legal);
+      setOperationAction(ISD::LLRINT, MVT::f32, Legal);
+      setOperationAction(ISD::LROUND, MVT::f64, Legal);
+      setOperationAction(ISD::LROUND, MVT::f32, Legal);
+      setOperationAction(ISD::LLROUND, MVT::f64, Legal);
+      setOperationAction(ISD::LLROUND, MVT::f32, Legal);
+    }
   } else {
     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -548,6 +559,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   }
 
+  if (Subtarget.hasVSX()) {
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+  }
+
   if (Subtarget.hasAltivec()) {
     // First set operation action for all vector types to expand. Then we
     // will selectively turn on ones that can be effectively codegen'd.
@@ -702,6 +720,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     if (!Subtarget.hasP8Altivec())
       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
 
+    // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
+    if (Subtarget.hasAltivec())
+      for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
+        setOperationAction(ISD::ROTL, VT, Legal);
+    // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
+    if (Subtarget.hasP8Altivec())
+      setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
+
     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -756,13 +782,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       }
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 
+      // The nearbyint variants are not allowed to raise the inexact exception
+      // so we can only code-gen them with unsafe math.
+      if (TM.Options.UnsafeFPMath) {
+        setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+        setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+      }
+
       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+      setOperationAction(ISD::FROUND, MVT::f64, Legal);
 
+      setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+      setOperationAction(ISD::FROUND, MVT::f32, Legal);
 
       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
@@ -910,12 +946,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setOperationAction(ISD::FREM, MVT::f128, Expand);
       }
       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
-
+      setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
+      setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
+      setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
+      setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
     }
 
     if (Subtarget.hasP9Altivec()) {
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
     }
   }
 
@@ -1183,7 +1230,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   if (Subtarget.isDarwin())
     setPrefFunctionAlignment(Align(16));
 
-  switch (Subtarget.getDarwinDirective()) {
+  switch (Subtarget.getCPUDirective()) {
   default: break;
   case PPC::DIR_970:
   case PPC::DIR_A2:
@@ -1198,6 +1245,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   case PPC::DIR_PWR7:
   case PPC::DIR_PWR8:
   case PPC::DIR_PWR9:
+  case PPC::DIR_PWR_FUTURE:
     setPrefLoopAlignment(Align(16));
     setPrefFunctionAlignment(Align(16));
     break;
@@ -1212,15 +1260,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   // The Freescale cores do better with aggressive inlining of memcpy and
   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
-  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
-      Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
+  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
+      Subtarget.getCPUDirective() == PPC::DIR_E5500) {
     MaxStoresPerMemset = 32;
     MaxStoresPerMemsetOptSize = 16;
     MaxStoresPerMemcpy = 32;
     MaxStoresPerMemcpyOptSize = 8;
     MaxStoresPerMemmove = 32;
     MaxStoresPerMemmoveOptSize = 8;
-  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
     // The A2 also benefits from (very) aggressive inlining of memcpy and
     // friends. The overhead of a the function call, even when warm, can be
     // over one hundred cycles.
@@ -1294,6 +1342,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
   case PPCISD::FSEL:            return "PPCISD::FSEL";
+  case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
+  case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
   case PPCISD::FCFID:           return "PPCISD::FCFID";
   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
@@ -1314,7 +1364,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::VPERM:           return "PPCISD::VPERM";
   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
-  case PPCISD::XXREVERSE:       return "PPCISD::XXREVERSE";
   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
   case PPCISD::CMPB:            return "PPCISD::CMPB";
@@ -1345,8 +1394,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
-  case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
-  case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
+  case PPCISD::ANDI_rec_1_EQ_BIT:
+    return "PPCISD::ANDI_rec_1_EQ_BIT";
+  case PPCISD::ANDI_rec_1_GT_BIT:
+    return "PPCISD::ANDI_rec_1_GT_BIT";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
   case PPCISD::LBRX:            return "PPCISD::LBRX";
@@ -2699,9 +2750,9 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   const Constant *C = CP->getConstVal();
 
-  // 64-bit SVR4 ABI code is always position-independent.
+  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
-  if (Subtarget.is64BitELFABI()) {
+  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
     return getTOCEntry(DAG, SDLoc(CP), GA);
@@ -2735,14 +2786,16 @@ unsigned PPCTargetLowering::getJumpTableEncoding() const {
 }
 
 bool PPCTargetLowering::isJumpTableRelative() const {
-  if (Subtarget.isPPC64())
+  if (UseAbsoluteJumpTables)
+    return false;
+  if (Subtarget.isPPC64() || Subtarget.isAIXABI())
     return true;
   return TargetLowering::isJumpTableRelative();
 }
 
 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                     SelectionDAG &DAG) const {
-  if (!Subtarget.isPPC64())
+  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
 
   switch (getTargetMachine().getCodeModel()) {
@@ -2759,7 +2812,7 @@ const MCExpr *
 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                                 unsigned JTI,
                                                 MCContext &Ctx) const {
-  if (!Subtarget.isPPC64())
+  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
 
   switch (getTargetMachine().getCodeModel()) {
@@ -2775,9 +2828,9 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 
-  // 64-bit SVR4 ABI code is always position-independent.
+  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
-  if (Subtarget.is64BitELFABI()) {
+  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
     return getTOCEntry(DAG, SDLoc(JT), GA);
@@ -2804,9 +2857,9 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
   const BlockAddress *BA = BASDN->getBlockAddress();
 
-  // 64-bit SVR4 ABI code is always position-independent.
+  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
   // The actual BlockAddress is stored in the TOC.
-  if (Subtarget.is64BitELFABI()) {
+  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
     return getTOCEntry(DAG, SDLoc(BASDN), GA);
@@ -3129,11 +3182,17 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
                                                   SelectionDAG &DAG) const {
+  if (Subtarget.isAIXABI())
+    report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
+
   return Op.getOperand(0);
 }
 
 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
                                                 SelectionDAG &DAG) const {
+  if (Subtarget.isAIXABI())
+    report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
+
   SDValue Chain = Op.getOperand(0);
   SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
@@ -3394,15 +3453,16 @@ SDValue PPCTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget.isAIXABI())
+    return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
+                                    InVals);
   if (Subtarget.is64BitELFABI())
     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
                                        InVals);
-  else if (Subtarget.is32BitELFABI())
+  if (Subtarget.is32BitELFABI())
     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
                                        InVals);
 
-  // FIXME: We are using this for both AIX and Darwin. We should add appropriate
-  // AIX testing, and rename it appropriately.
   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
                                      InVals);
 }
@@ -4934,213 +4994,6 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
   return false;
 }
 
-static unsigned
-PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
-            SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
-            bool isPatchPoint, bool hasNest,
-            SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
-            SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
-            ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
-  bool isPPC64 = Subtarget.isPPC64();
-  bool isSVR4ABI = Subtarget.isSVR4ABI();
-  bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
-  bool isAIXABI = Subtarget.isAIXABI();
-
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
-
-  unsigned CallOpc = PPCISD::CALL;
-
-  bool needIndirectCall = true;
-  if (!isSVR4ABI || !isPPC64)
-    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
-      // If this is an absolute destination address, use the munged value.
-      Callee = SDValue(Dest, 0);
-      needIndirectCall = false;
-    }
-
-  // PC-relative references to external symbols should go through $stub, unless
-  // we're building with the leopard linker or later, which automatically
-  // synthesizes these stubs.
-  const TargetMachine &TM = DAG.getTarget();
-  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
-  const GlobalValue *GV = nullptr;
-  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    GV = G->getGlobal();
-  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
-  bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
-
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
-  // every direct call is) turn it into a TargetGlobalAddress /
-  // TargetExternalSymbol node so that legalize doesn't hack it.
-  if (isFunctionGlobalAddress(Callee)) {
-    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
-
-    // A call to a TLS address is actually an indirect call to a
-    // thread-specific pointer.
-    unsigned OpFlags = 0;
-    if (UsePlt)
-      OpFlags = PPCII::MO_PLT;
-
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-                                        Callee.getValueType(), 0, OpFlags);
-    needIndirectCall = false;
-  }
-
-  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    unsigned char OpFlags = 0;
-
-    if (UsePlt)
-      OpFlags = PPCII::MO_PLT;
-
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
-                                         OpFlags);
-    needIndirectCall = false;
-  }
-
-  if (isPatchPoint) {
-    // We'll form an invalid direct call when lowering a patchpoint; the full
-    // sequence for an indirect call is complicated, and many of the
-    // instructions introduced might have side effects (and, thus, can't be
-    // removed later). The call itself will be removed as soon as the
-    // argument/return lowering is complete, so the fact that it has the wrong
-    // kind of operands should not really matter.
-    needIndirectCall = false;
-  }
-
-  if (needIndirectCall) {
-    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
-    // to do the call, we can't use PPCISD::CALL.
-    SDValue MTCTROps[] = {Chain, Callee, InFlag};
-
-    if (is64BitELFv1ABI) {
-      // Function pointers in the 64-bit SVR4 ABI do not point to the function
-      // entry point, but to the function descriptor (the function entry point
-      // address is part of the function descriptor though).
-      // The function descriptor is a three doubleword structure with the
-      // following fields: function entry point, TOC base address and
-      // environment pointer.
-      // Thus for a call through a function pointer, the following actions need
-      // to be performed:
-      //   1. Save the TOC of the caller in the TOC save area of its stack
-      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
-      //   2. Load the address of the function entry point from the function
-      //      descriptor.
-      //   3. Load the TOC of the callee from the function descriptor into r2.
-      //   4. Load the environment pointer from the function descriptor into
-      //      r11.
-      //   5. Branch to the function entry point address.
-      //   6. On return of the callee, the TOC of the caller needs to be
-      //      restored (this is done in FinishCall()).
-      //
-      // The loads are scheduled at the beginning of the call sequence, and the
-      // register copies are flagged together to ensure that no other
-      // operations can be scheduled in between. E.g. without flagging the
-      // copies together, a TOC access in the caller could be scheduled between
-      // the assignment of the callee TOC and the branch to the callee, which
-      // results in the TOC access going through the TOC of the callee instead
-      // of going through the TOC of the caller, which leads to incorrect code.
-
-      // Load the address of the function entry point from the function
-      // descriptor.
-      SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
-      if (LDChain.getValueType() == MVT::Glue)
-        LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
-
-      auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
-                          ? (MachineMemOperand::MODereferenceable |
-                             MachineMemOperand::MOInvariant)
-                          : MachineMemOperand::MONone;
-
-      MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
-      SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
-                                        /* Alignment = */ 8, MMOFlags);
-
-      // Load environment pointer into r11.
-      SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
-      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
-      SDValue LoadEnvPtr =
-          DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
-                      /* Alignment = */ 8, MMOFlags);
-
-      SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
-      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
-      SDValue TOCPtr =
-          DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
-                      /* Alignment = */ 8, MMOFlags);
-
-      setUsesTOCBasePtr(DAG);
-      SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
-                                        InFlag);
-      Chain = TOCVal.getValue(0);
-      InFlag = TOCVal.getValue(1);
-
-      // If the function call has an explicit 'nest' parameter, it takes the
-      // place of the environment pointer.
-      if (!hasNest) {
-        SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
-                                          InFlag);
-
-        Chain = EnvVal.getValue(0);
-        InFlag = EnvVal.getValue(1);
-      }
-
-      MTCTROps[0] = Chain;
-      MTCTROps[1] = LoadFuncPtr;
-      MTCTROps[2] = InFlag;
-    }
-
-    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
-                        makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
-    InFlag = Chain.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Other);
-    NodeTys.push_back(MVT::Glue);
-    Ops.push_back(Chain);
-    CallOpc = PPCISD::BCTRL;
-    Callee.setNode(nullptr);
-    // Add use of X11 (holding environment pointer)
-    if (is64BitELFv1ABI && !hasNest)
-      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
-    // Add CTR register as callee so a bctr can be emitted later.
-    if (isTailCall)
-      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
-  }
-
-  // If this is a direct call, pass the chain and the callee.
-  if (Callee.getNode()) {
-    Ops.push_back(Chain);
-    Ops.push_back(Callee);
-  }
-  // If this is a tail call add stack pointer delta.
-  if (isTailCall)
-    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
-
-  // Add argument registers to the end of the list so that they are known live
-  // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
-  // live into the call.
-  // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
-  if ((isSVR4ABI && isPPC64) || isAIXABI) {
-    setUsesTOCBasePtr(DAG);
-
-    // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
-    // no way to mark dependencies as implicit here.
-    // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
-    if (!isPatchPoint)
-      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
-                                            : PPC::R2, PtrVT));
-  }
-
-  return CallOpc;
-}
-
 SDValue PPCTargetLowering::LowerCallResult(
     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -5205,30 +5058,357 @@ SDValue PPCTargetLowering::LowerCallResult(
   return Chain;
 }
 
-SDValue PPCTargetLowering::FinishCall(
-    CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
-    bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
-    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
-    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
-    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
-    SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
-  std::vector<EVT> NodeTys;
-  SmallVector<SDValue, 8> Ops;
-  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
-                                 SPDiff, isTailCall, isPatchPoint, hasNest,
-                                 RegsToPass, Ops, NodeTys, CS, Subtarget);
+static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
+                           const PPCSubtarget &Subtarget, bool isPatchPoint) {
+  // PatchPoint calls are not indirect.
+  if (isPatchPoint)
+    return false;
+
+  if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
+    return false;
+
+  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
+  // becuase the immediate function pointer points to a descriptor instead of
+  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
+  // pointer immediate points to the global entry point, while the BLA would
+  // need to jump to the local entry point (see rL211174).
+  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
+      isBLACompatibleAddress(Callee, DAG))
+    return false;
+
+  return true;
+}
+
+static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
+                              bool isTailCall, const Function &Caller,
+                              const SDValue &Callee,
+                              const PPCSubtarget &Subtarget,
+                              const TargetMachine &TM) {
+  if (isTailCall)
+    return PPCISD::TC_RETURN;
+
+  // This is a call through a function pointer.
+  if (isIndirectCall) {
+    // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
+    // indirect calls. The save of the caller's TOC pointer to the stack will be
+    // inserted into the DAG as part of call lowering. The restore of the TOC
+    // pointer is modeled by using a pseudo instruction for the call opcode that
+    // represents the 2 instruction sequence of an indirect branch and link,
+    // immediately followed by a load of the TOC pointer from the the stack save
+    // slot into gpr2.
+    if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+      return PPCISD::BCTRL_LOAD_TOC;
+
+    // An indirect call that does not need a TOC restore.
+    return PPCISD::BCTRL;
+  }
+
+  // The ABIs that maintain a TOC pointer accross calls need to have a nop
+  // immediately following the call instruction if the caller and callee may
+  // have different TOC bases. At link time if the linker determines the calls
+  // may not share a TOC base, the call is redirected to a trampoline inserted
+  // by the linker. The trampoline will (among other things) save the callers
+  // TOC pointer at an ABI designated offset in the linkage area and the linker
+  // will rewrite the nop to be a load of the TOC pointer from the linkage area
+  // into gpr2.
+  if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+    return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+                                                  : PPCISD::CALL_NOP;
+
+  return PPCISD::CALL;
+}
+
+static bool isValidAIXExternalSymSDNode(StringRef SymName) {
+  return StringSwitch<bool>(SymName)
+      .Cases("__divdi3", "__fixunsdfdi", "__floatundidf", "__floatundisf",
+             "__moddi3", "__udivdi3", "__umoddi3", true)
+      .Cases("ceil", "floor", "memcpy", "memmove", "memset", "round", true)
+      .Default(false);
+}
+
+static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
+                               const SDLoc &dl, const PPCSubtarget &Subtarget) {
+  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
+    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+      return SDValue(Dest, 0);
+
+  // Returns true if the callee is local, and false otherwise.
+  auto isLocalCallee = [&]() {
+    const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+    const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+    const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+
+    return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
+           !dyn_cast_or_null<GlobalIFunc>(GV);
+  };
+
+  // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
+  // a static relocation model causes some versions of GNU LD (2.17.50, at
+  // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
+  // built with secure-PLT.
+  bool UsePlt =
+      Subtarget.is32BitELFABI() && !isLocalCallee() &&
+      Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
+
+  // On AIX, direct function calls reference the symbol for the function's
+  // entry point, which is named by prepending a "." before the function's
+  // C-linkage name.
+  const auto getAIXFuncEntryPointSymbolSDNode =
+      [&](StringRef FuncName, bool IsDeclaration,
+          const XCOFF::StorageClass &SC) {
+        auto &Context = DAG.getMachineFunction().getMMI().getContext();
+
+        MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
+            Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
+
+        if (IsDeclaration && !S->hasContainingCsect()) {
+          // On AIX, an undefined symbol needs to be associated with a
+          // MCSectionXCOFF to get the correct storage mapping class.
+          // In this case, XCOFF::XMC_PR.
+          MCSectionXCOFF *Sec = Context.getXCOFFSection(
+              S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
+              SectionKind::getMetadata());
+          S->setContainingCsect(Sec);
+        }
+
+        MVT PtrVT =
+            DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+        return DAG.getMCSymbol(S, PtrVT);
+      };
+
+  if (isFunctionGlobalAddress(Callee)) {
+    const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+    const GlobalValue *GV = G->getGlobal();
+
+    if (!Subtarget.isAIXABI())
+      return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
+                                        UsePlt ? PPCII::MO_PLT : 0);
+
+    assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
+    const GlobalObject *GO = cast<GlobalObject>(GV);
+    const XCOFF::StorageClass SC =
+        TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+    return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(),
+                                            SC);
+  }
+
+  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    const char *SymName = S->getSymbol();
+    if (!Subtarget.isAIXABI())
+      return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
+                                         UsePlt ? PPCII::MO_PLT : 0);
+
+    // If there exists a user-declared function whose name is the same as the
+    // ExternalSymbol's, then we pick up the user-declared version.
+    const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+    if (const Function *F =
+            dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
+      const XCOFF::StorageClass SC =
+          TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F);
+      return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
+                                              SC);
+    }
+
+    // TODO: Remove this when the support for ExternalSymbolSDNode is complete.
+    if (isValidAIXExternalSymSDNode(SymName)) {
+      return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
+    }
+
+    report_fatal_error("Unexpected ExternalSymbolSDNode: " + Twine(SymName));
+  }
+
+  // No transformation needed.
+  assert(Callee.getNode() && "What no callee?");
+  return Callee;
+}
+
+static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
+  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
+         "Expected a CALLSEQ_STARTSDNode.");
+
+  // The last operand is the chain, except when the node has glue. If the node
+  // has glue, then the last operand is the glue, and the chain is the second
+  // last operand.
+  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
+  if (LastValue.getValueType() != MVT::Glue)
+    return LastValue;
+
+  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
+}
+
+// Creates the node that moves a functions address into the count register
+// to prepare for an indirect call instruction.
+static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+                                SDValue &Glue, SDValue &Chain,
+                                const SDLoc &dl) {
+  SDValue MTCTROps[] = {Chain, Callee, Glue};
+  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
+  Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
+                      makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
+  // The glue is the second value produced.
+  Glue = Chain.getValue(1);
+}
+
+static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+                                          SDValue &Glue, SDValue &Chain,
+                                          SDValue CallSeqStart,
+                                          ImmutableCallSite CS, const SDLoc &dl,
+                                          bool hasNest,
+                                          const PPCSubtarget &Subtarget) {
+  // Function pointers in the 64-bit SVR4 ABI do not point to the function
+  // entry point, but to the function descriptor (the function entry point
+  // address is part of the function descriptor though).
+  // The function descriptor is a three doubleword structure with the
+  // following fields: function entry point, TOC base address and
+  // environment pointer.
+  // Thus for a call through a function pointer, the following actions need
+  // to be performed:
+  //   1. Save the TOC of the caller in the TOC save area of its stack
+  //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+  //   2. Load the address of the function entry point from the function
+  //      descriptor.
+  //   3. Load the TOC of the callee from the function descriptor into r2.
+  //   4. Load the environment pointer from the function descriptor into
+  //      r11.
+  //   5. Branch to the function entry point address.
+  //   6. On return of the callee, the TOC of the caller needs to be
+  //      restored (this is done in FinishCall()).
+  //
+  // The loads are scheduled at the beginning of the call sequence, and the
+  // register copies are flagged together to ensure that no other
+  // operations can be scheduled in between. E.g. without flagging the
+  // copies together, a TOC access in the caller could be scheduled between
+  // the assignment of the callee TOC and the branch to the callee, which leads
+  // to incorrect code.
+
+  // Start by loading the function address from the descriptor.
+  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
+  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
+                      ? (MachineMemOperand::MODereferenceable |
+                         MachineMemOperand::MOInvariant)
+                      : MachineMemOperand::MONone;
+
+  MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
+
+  // Registers used in building the DAG.
+  const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
+  const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
+
+  // Offsets of descriptor members.
+  const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
+  const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
+
+  const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+  const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
+
+  // One load for the functions entry point address.
+  SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
+                                    Alignment, MMOFlags);
+
+  // One for loading the TOC anchor for the module that contains the called
+  // function.
+  SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
+  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
+  SDValue TOCPtr =
+      DAG.getLoad(RegVT, dl, LDChain, AddTOC,
+                  MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
+
+  // One for loading the environment pointer.
+  SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
+  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
+  SDValue LoadEnvPtr =
+      DAG.getLoad(RegVT, dl, LDChain, AddPtr,
+                  MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
+
+
+  // Then copy the newly loaded TOC anchor to the TOC pointer.
+  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
+  Chain = TOCVal.getValue(0);
+  Glue = TOCVal.getValue(1);
+
+  // If the function call has an explicit 'nest' parameter, it takes the
+  // place of the environment pointer.
+  assert((!hasNest || !Subtarget.isAIXABI()) &&
+         "Nest parameter is not supported on AIX.");
+  if (!hasNest) {
+    SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
+    Chain = EnvVal.getValue(0);
+    Glue = EnvVal.getValue(1);
+  }
+
+  // The rest of the indirect call sequence is the same as the non-descriptor
+  // DAG.
+  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
+}
+
+static void
+buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
+                  const SDLoc &dl, bool isTailCall, bool isVarArg,
+                  bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+                  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
+                  const PPCSubtarget &Subtarget, bool isIndirect) {
+  const bool IsPPC64 = Subtarget.isPPC64();
+  // MVT for a general purpose register.
+  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+
+  // First operand is always the chain.
+  Ops.push_back(Chain);
+
+  // If it's a direct call pass the callee as the second operand.
+  if (!isIndirect)
+    Ops.push_back(Callee);
+  else {
+    assert(!isPatchPoint && "Patch point call are not indirect.");
+
+    // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
+    // on the stack (this would have been done in `LowerCall_64SVR4` or
+    // `LowerCall_AIX`). The call instruction is a pseudo instruction that
+    // represents both the indirect branch and a load that restores the TOC
+    // pointer from the linkage area. The operand for the TOC restore is an add
+    // of the TOC save offset to the stack pointer. This must be the second
+    // operand: after the chain input but before any other variadic arguments.
+    if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+      const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+
+      SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
+      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
+      Ops.push_back(AddTOC);
+    }
+
+    // Add the register used for the environment pointer.
+    if (Subtarget.usesFunctionDescriptors() && !hasNest)
+      Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
+                                    RegVT));
+
+
+    // Add CTR register as callee so a bctr can be emitted later.
+    if (isTailCall)
+      Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
+  }
+
+  // If this is a tail call add stack pointer delta.
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+  // no way to mark dependencies as implicit here.
+  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
+    Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
 
   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
-  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+  if (isVarArg && Subtarget.is32BitELFABI())
     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
 
-  // When performing tail call optimization the callee pops its arguments off
-  // the stack. Account for this here so these bytes can be pushed back on in
-  // PPCFrameLowering::eliminateCallFramePseudoInstr.
-  int BytesCalleePops =
-    (CallConv == CallingConv::Fast &&
-     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
-
   // Add a register mask operand representing the call-preserved registers.
   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
   const uint32_t *Mask =
@@ -5236,8 +5416,40 @@ SDValue PPCTargetLowering::FinishCall(
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
+  // If the glue is valid, it is the last operand.
+  if (Glue.getNode())
+    Ops.push_back(Glue);
+}
+
+SDValue PPCTargetLowering::FinishCall(
+    CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
+    bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
+    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
+    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
+    SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
+
+  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+    setUsesTOCBasePtr(DAG);
+
+  const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint);
+  unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall,
+                                   DAG.getMachineFunction().getFunction(),
+                                   Callee, Subtarget, DAG.getTarget());
+
+  if (!isIndirect)
+    Callee = transformCallee(Callee, DAG, dl, Subtarget);
+  else if (Subtarget.usesFunctionDescriptors())
+    prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS,
+                                  dl, hasNest, Subtarget);
+  else
+    prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
+
+  // Build the operand list for the call instruction.
+  SmallVector<SDValue, 8> Ops;
+  buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+                    hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff,
+                    Subtarget, isIndirect);
 
   // Emit tail call.
   if (isTailCall) {
@@ -5246,81 +5458,32 @@ SDValue PPCTargetLowering::FinishCall(
             Callee.getOpcode() == ISD::TargetExternalSymbol ||
             Callee.getOpcode() == ISD::TargetGlobalAddress ||
             isa<ConstantSDNode>(Callee)) &&
-    "Expecting an global address, external symbol, absolute value or register");
-
+           "Expecting a global address, external symbol, absolute value or "
+           "register");
+    assert(CallOpc == PPCISD::TC_RETURN &&
+           "Unexpected call opcode for a tail call.");
     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
-    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
+    return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
   }
 
-  // Add a NOP immediately after the branch instruction when using the 64-bit
-  // SVR4 or the AIX ABI.
-  // At link time, if caller and callee are in a different module and
-  // thus have a different TOC, the call will be replaced with a call to a stub
-  // function which saves the current TOC, loads the TOC of the callee and
-  // branches to the callee. The NOP will be replaced with a load instruction
-  // which restores the TOC of the caller from the TOC save slot of the current
-  // stack frame. If caller and callee belong to the same module (and have the
-  // same TOC), the NOP will remain unchanged, or become some other NOP.
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  if (!isTailCall && !isPatchPoint &&
-      ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
-       Subtarget.isAIXABI())) {
-    if (CallOpc == PPCISD::BCTRL) {
-      if (Subtarget.isAIXABI())
-        report_fatal_error("Indirect call on AIX is not implemented.");
-
-      // This is a call through a function pointer.
-      // Restore the caller TOC from the save area into R2.
-      // See PrepareCall() for more information about calls through function
-      // pointers in the 64-bit SVR4 ABI.
-      // We are using a target-specific load with r2 hard coded, because the
-      // result of a target-independent load would never go directly into r2,
-      // since r2 is a reserved register (which prevents the register allocator
-      // from allocating it), resulting in an additional register being
-      // allocated and an unnecessary move instruction being generated.
-      CallOpc = PPCISD::BCTRL_LOAD_TOC;
-
-      SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
-      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
-      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
-      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+  std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
+  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
+  Glue = Chain.getValue(1);
 
-      // The address needs to go after the chain input but before the flag (or
-      // any other variadic arguments).
-      Ops.insert(std::next(Ops.begin()), AddTOC);
-    } else if (CallOpc == PPCISD::CALL &&
-      !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
-      // Otherwise insert NOP for non-local calls.
-      CallOpc = PPCISD::CALL_NOP;
-    }
-  }
-
-  if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
-    // On AIX, direct function calls reference the symbol for the function's
-    // entry point, which is named by inserting a "." before the function's
-    // C-linkage name.
-    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
-    auto &Context = DAG.getMachineFunction().getMMI().getContext();
-    MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
-                                            Twine(G->getGlobal()->getName()));
-    Callee = DAG.getMCSymbol(S, PtrVT);
-    // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
-    Ops[1] = Callee;
-  }
-
-  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
-  InFlag = Chain.getValue(1);
+  // When performing tail call optimization the callee pops its arguments off
+  // the stack. Account for this here so these bytes can be pushed back on in
+  // PPCFrameLowering::eliminateCallFramePseudoInstr.
+  int BytesCalleePops = (CallConv == CallingConv::Fast &&
+                         getTargetMachine().Options.GuaranteedTailCallOpt)
+                            ? NumBytes
+                            : 0;
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
-                             InFlag, dl);
-  if (!Ins.empty())
-    InFlag = Chain.getValue(1);
+                             Glue, dl);
+  Glue = Chain.getValue(1);
 
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
-                         Ins, dl, DAG, InVals);
+  return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals);
 }
 
 SDValue
@@ -6273,8 +6436,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
   // Check if this is an indirect call (MTCTR/BCTRL).
-  // See PrepareCall() for more information about calls through function
-  // pointers in the 64-bit SVR4 ABI.
+  // See prepareDescriptorIndirectCall and buildCallOperands for more
+  // information about calls through function pointers in the 64-bit SVR4 ABI.
   if (!isTailCall && !isPatchPoint &&
       !isFunctionGlobalAddress(Callee) &&
       !isa<ExternalSymbolSDNode>(Callee)) {
@@ -6695,6 +6858,205 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
                     NumBytes, Ins, InVals, CS);
 }
 
+static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
+                   CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+                   CCState &State) {
+
+  if (ValVT == MVT::f128)
+    report_fatal_error("f128 is unimplemented on AIX.");
+
+  if (ArgFlags.isByVal())
+    report_fatal_error("Passing structure by value is unimplemented.");
+
+  if (ArgFlags.isNest())
+    report_fatal_error("Nest arguments are unimplemented.");
+
+  if (ValVT.isVector() || LocVT.isVector())
+    report_fatal_error("Vector arguments are unimplemented on AIX.");
+
+  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
+      State.getMachineFunction().getSubtarget());
+  const bool IsPPC64 = Subtarget.isPPC64();
+  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+  static const MCPhysReg GPR_32[] = {// 32-bit registers.
+                                     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+                                     PPC::R7, PPC::R8, PPC::R9, PPC::R10};
+  static const MCPhysReg GPR_64[] = {// 64-bit registers.
+                                     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+                                     PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+
+  // Arguments always reserve parameter save area.
+  switch (ValVT.SimpleTy) {
+  default:
+    report_fatal_error("Unhandled value type for argument.");
+  case MVT::i64:
+    // i64 arguments should have been split to i32 for PPC32.
+    assert(IsPPC64 && "PPC32 should have split i64 values.");
+    LLVM_FALLTHROUGH;
+  case MVT::i1:
+  case MVT::i32:
+    State.AllocateStack(PtrByteSize, PtrByteSize);
+    if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+      MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+      // Promote integers if needed.
+      if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+        LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
+                                    : CCValAssign::LocInfo::ZExt;
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+    }
+    else
+      report_fatal_error("Handling of placing parameters on the stack is "
+                         "unimplemented!");
+    return false;
+
+  case MVT::f32:
+  case MVT::f64: {
+    // Parameter save area (PSA) is reserved even if the float passes in fpr.
+    const unsigned StoreSize = LocVT.getStoreSize();
+    // Floats are always 4-byte aligned in the PSA on AIX.
+    // This includes f64 in 64-bit mode for ABI compatibility.
+    State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
+    if (unsigned Reg = State.AllocateReg(FPR))
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    else
+      report_fatal_error("Handling of placing parameters on the stack is "
+                         "unimplemented!");
+
+    // AIX requires that GPRs are reserved for float arguments.
+    // Successfully reserved GPRs are only initialized for vararg calls.
+    MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+    for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+      if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+        if (State.isVarArg()) {
+          // Custom handling is required for:
+          //   f64 in PPC32 needs to be split into 2 GPRs.
+          //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
+          State.addLoc(
+              CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+        }
+      } else if (State.isVarArg()) {
+        report_fatal_error("Handling of placing parameters on the stack is "
+                           "unimplemented!");
+      }
+    }
+
+    return false;
+  }
+  }
+  return true;
+}
+
+static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
+                                                    bool IsPPC64) {
+  assert((IsPPC64 || SVT != MVT::i64) &&
+         "i64 should have been split for 32-bit codegen.");
+
+  switch (SVT) {
+  default:
+    report_fatal_error("Unexpected value type for formal argument");
+  case MVT::i1:
+  case MVT::i32:
+  case MVT::i64:
+    return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+  case MVT::f32:
+    return &PPC::F4RCRegClass;
+  case MVT::f64:
+    return &PPC::F8RCRegClass;
+  }
+}
+
+static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
+                                        SelectionDAG &DAG, SDValue ArgValue,
+                                        MVT LocVT, const SDLoc &dl) {
+  assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
+  assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
+
+  if (Flags.isSExt())
+    ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+                           DAG.getValueType(ValVT));
+  else if (Flags.isZExt())
+    ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+                           DAG.getValueType(ValVT));
+
+  return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
+}
+
+SDValue PPCTargetLowering::LowerFormalArguments_AIX(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
+          CallConv == CallingConv::Fast) &&
+         "Unexpected calling convention!");
+
+  if (isVarArg)
+    report_fatal_error("This call type is unimplemented on AIX.");
+
+  if (getTargetMachine().Options.GuaranteedTailCallOpt)
+    report_fatal_error("Tail call support is unimplemented on AIX.");
+
+  if (useSoftFloat())
+    report_fatal_error("Soft float support is unimplemented on AIX.");
+
+  const PPCSubtarget &Subtarget =
+      static_cast<const PPCSubtarget &>(DAG.getSubtarget());
+  if (Subtarget.hasQPX())
+    report_fatal_error("QPX support is not supported on AIX.");
+
+  const bool IsPPC64 = Subtarget.isPPC64();
+  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  MachineFunction &MF = DAG.getMachineFunction();
+  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+
+  // Reserve space for the linkage area on the stack.
+  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+  // On AIX a minimum of 8 words is saved to the parameter save area.
+  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
+  CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
+  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue ArgValue;
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    if (VA.isRegLoc()) {
+      EVT ValVT = VA.getValVT();
+      MVT LocVT = VA.getLocVT();
+      MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+      unsigned VReg =
+          MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+      ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+      if (ValVT.isScalarInteger() &&
+          (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
+        ArgValue =
+            truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
+      }
+      InVals.push_back(ArgValue);
+    } else {
+      report_fatal_error("Handling of formal arguments on the stack is "
+                         "unimplemented!");
+    }
+  }
+
+  // Area that is at least reserved in the caller of this function.
+  unsigned MinReservedArea = CCInfo.getNextStackOffset();
+
+  // Set the size that is at least reserved in caller of this function. Tail
+  // call optimized function's reserved stack space needs to be aligned so
+  // that taking the difference between two stack areas will result in an
+  // aligned stack.
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setMinReservedArea(MinReservedArea);
+
+  return Chain;
+}
 
 SDValue PPCTargetLowering::LowerCall_AIX(
     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
@@ -6705,22 +7067,33 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
     ImmutableCallSite CS) const {
 
-  assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
-         "Unimplemented calling convention!");
-  if (isVarArg || isPatchPoint)
+  assert((CallConv == CallingConv::C ||
+          CallConv == CallingConv::Cold ||
+          CallConv == CallingConv::Fast) && "Unexpected calling convention!");
+
+  if (isPatchPoint)
     report_fatal_error("This call type is unimplemented on AIX.");
 
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  bool isPPC64 = PtrVT == MVT::i64;
-  unsigned PtrByteSize = isPPC64 ? 8 : 4;
-  unsigned NumOps = Outs.size();
+  const PPCSubtarget& Subtarget =
+      static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+  if (Subtarget.hasQPX())
+    report_fatal_error("QPX is not supported on AIX.");
+  if (Subtarget.hasAltivec())
+    report_fatal_error("Altivec support is unimplemented on AIX.");
 
+  MachineFunction &MF = DAG.getMachineFunction();
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
 
-  // Count how many bytes are to be pushed on the stack, including the linkage
-  // area, parameter list area.
-  // On XCOFF, we start with 24/48, which is reserved space for
-  // [SP][CR][LR][2 x reserved][TOC].
-  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+  // Reserve space for the linkage save area (LSA) on the stack.
+  // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
+  //   [SP][CR][LR][2 x reserved][TOC].
+  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
+  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+  const bool IsPPC64 = Subtarget.isPPC64();
+  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
 
   // The prolog code of the callee may store up to 8 GPR argument registers to
   // the stack, allowing va_start to index over them in memory if the callee
@@ -6728,98 +7101,101 @@ SDValue PPCTargetLowering::LowerCall_AIX(
   // Because we cannot tell if this is needed on the caller side, we have to
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
-  unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+  const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
+  const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize;
 
   // Adjust the stack pointer for the new arguments...
-  // These operations are automatically eliminated by the prolog/epilog
-  // inserter pass.
+  // These operations are automatically eliminated by the prolog/epilog pass.
   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
   SDValue CallSeqStart = Chain;
 
-  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
-    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
-    PPC::R7, PPC::R8, PPC::R9, PPC::R10
-  };
-  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
-    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
-    PPC::X7, PPC::X8, PPC::X9, PPC::X10
-  };
-
-  const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
-                                   : array_lengthof(GPR_32);
-  const unsigned NumFPRs = array_lengthof(FPR);
-  assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
-                          "on AIX");
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
 
-  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
-  unsigned GPR_idx = 0, FPR_idx = 0;
+  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
+    CCValAssign &VA = ArgLocs[I++];
 
-  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+    if (VA.isMemLoc())
+      report_fatal_error("Handling of placing parameters on the stack is "
+                         "unimplemented!");
+    if (!VA.isRegLoc())
+      report_fatal_error(
+          "Unexpected non-register location for function call argument.");
 
-  if (isTailCall)
-    report_fatal_error("Handling of tail call is unimplemented!");
-  int SPDiff = 0;
+    SDValue Arg = OutVals[VA.getValNo()];
 
-  for (unsigned i = 0; i != NumOps; ++i) {
-    SDValue Arg = OutVals[i];
-    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    if (!VA.needsCustom()) {
+      switch (VA.getLocInfo()) {
+      default:
+        report_fatal_error("Unexpected argument extension type.");
+      case CCValAssign::Full:
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      }
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
 
-    // Promote integers if needed.
-    if (Arg.getValueType() == MVT::i1 ||
-        (isPPC64 && Arg.getValueType() == MVT::i32)) {
-      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-      Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+      continue;
     }
 
-    // Note: "by value" is code for passing a structure by value, not
-    // basic types.
-    if (Flags.isByVal())
-      report_fatal_error("Passing structure by value is unimplemented!");
+    // Custom handling is used for GPR initializations for vararg float
+    // arguments.
+    assert(isVarArg && VA.getValVT().isFloatingPoint() &&
+           VA.getLocVT().isInteger() &&
+           "Unexpected custom register handling for calling convention.");
 
-    switch (Arg.getSimpleValueType().SimpleTy) {
-    default: llvm_unreachable("Unexpected ValueType for argument!");
-    case MVT::i1:
-    case MVT::i32:
-    case MVT::i64:
-      if (GPR_idx != NumGPRs)
-        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
-      else
-        report_fatal_error("Handling of placing parameters on the stack is "
-                           "unimplemented!");
-      break;
-    case MVT::f32:
-    case MVT::f64:
-      if (FPR_idx != NumFPRs) {
-        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+    SDValue ArgAsInt =
+        DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
 
-        // If we have any FPRs remaining, we may also have GPRs remaining.
-        // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
-        // GPRs.
-        if (GPR_idx != NumGPRs)
-          ++GPR_idx;
-        if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
-          ++GPR_idx;
-      } else
-        report_fatal_error("Handling of placing parameters on the stack is "
-                           "unimplemented!");
-      break;
-    case MVT::v4f32:
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v16i8:
-    case MVT::v2f64:
-    case MVT::v2i64:
-    case MVT::v1i128:
-    case MVT::f128:
-    case MVT::v4f64:
-    case MVT::v4i1:
-      report_fatal_error("Handling of this parameter type is unimplemented!");
-    }
-  }
+    if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+      // f32 in 32-bit GPR
+      // f64 in 64-bit GPR
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
+    else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+      // f32 in 64-bit GPR.
+      RegsToPass.push_back(std::make_pair(
+          VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+    else {
+      // f64 in two 32-bit GPRs
+      // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
+      assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+             "Unexpected custom register for argument!");
+      CCValAssign &GPR1 = VA;
+      SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
+                                     DAG.getConstant(32, dl, MVT::i8));
+      RegsToPass.push_back(std::make_pair(
+          GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
+      assert(I != E && "A second custom GPR is expected!");
+      CCValAssign &GPR2 = ArgLocs[I++];
+      assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
+             GPR2.needsCustom() && "A second custom GPR is expected!");
+      RegsToPass.push_back(std::make_pair(
+          GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+    }
+  }
+
+  // For indirect calls, we need to save the TOC base to the stack for
+  // restoration after the call.
+  if (!isTailCall && !isPatchPoint &&
+      !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) {
+    const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
+    const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+    const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+    const unsigned TOCSaveOffset =
+        Subtarget.getFrameLowering()->getTOCSaveOffset();
 
-  if (!isFunctionGlobalAddress(Callee) &&
-      !isa<ExternalSymbolSDNode>(Callee))
-    report_fatal_error("Handling of indirect call is unimplemented!");
+    setUsesTOCBasePtr(DAG);
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
+    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+    SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
+    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+    Chain = DAG.getStore(
+        Val.getValue(1), dl, Val, AddPtr,
+        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
+  }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
@@ -6829,10 +7205,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
     InFlag = Chain.getValue(1);
   }
 
+  const int SPDiff = 0;
   return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
-                    /* unused except on PPC64 ELFv1 */ false, DAG,
-                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
-                    NumBytes, Ins, InVals, CS);
+                    /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass,
+                    InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins,
+                    InVals, CS);
 }
 
 bool
@@ -7121,8 +7498,7 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
          "Custom lowering only for i1 results");
 
   SDLoc DL(Op);
-  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
-                     Op.getOperand(0));
+  return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
 }
 
 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
@@ -7188,17 +7564,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
       !Op.getOperand(2).getValueType().isFloatingPoint())
     return Op;
 
+  bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
+  bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
   // We might be able to do better than this under some circumstances, but in
   // general, fsel-based lowering of select is a finite-math-only optimization.
   // For more information, see section F.3 of the 2.06 ISA specification.
-  if (!DAG.getTarget().Options.NoInfsFPMath ||
-      !DAG.getTarget().Options.NoNaNsFPMath)
+  // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
+  // presence of infinities.
+  if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
     return Op;
-  // TODO: Propagate flags from the select rather than global settings.
-  SDNodeFlags Flags;
-  Flags.setNoInfs(true);
-  Flags.setNoNaNs(true);
-
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   EVT ResVT = Op.getValueType();
@@ -7207,6 +7581,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
   SDLoc dl(Op);
 
+  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
+    switch (CC) {
+    default:
+      // Not a min/max but with finite math, we may still be able to use fsel.
+      if (HasNoInfs && HasNoNaNs)
+        break;
+      return Op;
+    case ISD::SETOGT:
+    case ISD::SETGT:
+      return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+    case ISD::SETOLT:
+    case ISD::SETLT:
+      return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+    }
+  }
+
+  // TODO: Propagate flags from the select rather than global settings.
+  SDNodeFlags Flags;
+  Flags.setNoInfs(true);
+  Flags.setNoNaNs(true);
+
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
   SDValue Sel1;
@@ -8055,8 +8450,6 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
 /// SplatSize.  Cast the result to VT.
 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
                            SelectionDAG &DAG, const SDLoc &dl) {
-  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
-
   static const MVT VTys[] = { // canonical VT to use for each size.
     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
   };
@@ -8376,29 +8769,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   }
 
   // We have XXSPLTIB for constant splats one byte wide
-  if (Subtarget.hasP9Vector() && SplatSize == 1) {
-    // This is a splat of 1-byte elements with some elements potentially undef.
-    // Rather than trying to match undef in the SDAG patterns, ensure that all
-    // elements are the same constant.
-    if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
-      SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
-                                                       dl, MVT::i32));
-      SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
-      if (Op.getValueType() != MVT::v16i8)
-        return DAG.getBitcast(Op.getValueType(), NewBV);
-      return NewBV;
-    }
-
-    // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
-    // detect that constant splats like v8i16: 0xABAB are really just splats
-    // of a 1-byte constant. In this case, we need to convert the node to a
-    // splat of v16i8 and a bitcast.
-    if (Op.getValueType() != MVT::v16i8)
-      return DAG.getBitcast(Op.getValueType(),
-                            DAG.getConstant(SplatBits, dl, MVT::v16i8));
-
-    return Op;
-  }
+  // FIXME: SplatBits is an unsigned int being cast to an int while passing it
+  // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
+  if (Subtarget.hasP9Vector() && SplatSize == 1)
+    return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
 
   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
@@ -8930,19 +9304,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   if (Subtarget.hasP9Vector()) {
      if (PPC::isXXBRHShuffleMask(SVOp)) {
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
-      SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
+      SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
-      SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
+      SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
-      SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
+      SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
-      SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
+      SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
     }
   }
@@ -9503,7 +9877,7 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
                    Op.getOperand(0));
   // XXBRD
-  Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
+  Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
   // MFVSRD
   int VectorIndex = 0;
   if (Subtarget.isLittleEndian())
@@ -10845,9 +11219,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     DebugLoc dl = MI.getDebugLoc();
     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
-  } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
-             MI.getOpcode() == PPC::SELECT_CC_I8 ||
-             MI.getOpcode() == PPC::SELECT_CC_F4 ||
+  } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
              MI.getOpcode() == PPC::SELECT_CC_F8 ||
              MI.getOpcode() == PPC::SELECT_CC_F16 ||
              MI.getOpcode() == PPC::SELECT_CC_QFRC ||
@@ -10859,8 +11231,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
              MI.getOpcode() == PPC::SELECT_CC_SPE ||
-             MI.getOpcode() == PPC::SELECT_I4 ||
-             MI.getOpcode() == PPC::SELECT_I8 ||
              MI.getOpcode() == PPC::SELECT_F4 ||
              MI.getOpcode() == PPC::SELECT_F8 ||
              MI.getOpcode() == PPC::SELECT_F16 ||
@@ -11397,28 +11767,28 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
     // Restore FPSCR value.
     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
-  } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
-             MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
-             MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
-             MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
-    unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
-                       MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
-                          ? PPC::ANDIo8
-                          : PPC::ANDIo;
-    bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
-                 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+  } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+             MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
+             MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+             MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
+    unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+                       MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
+                          ? PPC::ANDI8_rec
+                          : PPC::ANDI_rec;
+    bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+                 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
 
     MachineRegisterInfo &RegInfo = F->getRegInfo();
     Register Dest = RegInfo.createVirtualRegister(
-        Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
+        Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
 
-    DebugLoc dl = MI.getDebugLoc();
-    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+    DebugLoc Dl = MI.getDebugLoc();
+    BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
         .addReg(MI.getOperand(1).getReg())
         .addImm(1);
-    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
             MI.getOperand(0).getReg())
-        .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+        .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
     DebugLoc Dl = MI.getDebugLoc();
     MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -11638,7 +12008,7 @@ unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
   // reciprocal if there are two or more FDIVs (for embedded cores with only
   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
-  switch (Subtarget.getDarwinDirective()) {
+  switch (Subtarget.getCPUDirective()) {
   default:
     return 3;
   case PPC::DIR_440:
@@ -14111,7 +14481,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
 }
 
 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
-  switch (Subtarget.getDarwinDirective()) {
+  switch (Subtarget.getCPUDirective()) {
   default: break;
   case PPC::DIR_970:
   case PPC::DIR_PWR4:
@@ -14121,7 +14491,8 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   case PPC::DIR_PWR6X:
   case PPC::DIR_PWR7:
   case PPC::DIR_PWR8:
-  case PPC::DIR_PWR9: {
+  case PPC::DIR_PWR9:
+  case PPC::DIR_PWR_FUTURE: {
     if (!ML)
       break;
 
@@ -14309,6 +14680,17 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       return std::make_pair(0U, &PPC::VSFRCRegClass);
   }
 
+  // If we name a VSX register, we can't defer to the base class because it
+  // will not recognize the correct register (their names will be VSL{0-31}
+  // and V{0-31} so they won't match). So we match them here.
+  if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
+    int VSNum = atoi(Constraint.data() + 3);
+    assert(VSNum >= 0 && VSNum <= 63 &&
+           "Attempted to access a vsr out of range");
+    if (VSNum < 32)
+      return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
+    return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
+  }
   std::pair<unsigned, const TargetRegisterClass *> R =
       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 
@@ -14513,16 +14895,15 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
                                               const MachineFunction &MF) const {
   bool isPPC64 = Subtarget.isPPC64();
   bool IsDarwinABI = Subtarget.isDarwinABI();
 
-  if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
-      (!isPPC64 && VT != MVT::i32))
+  bool is64Bit = isPPC64 && VT == LLT::scalar(64);
+  if (!is64Bit && VT != LLT::scalar(32))
     report_fatal_error("Invalid register global variable type");
 
-  bool is64Bit = isPPC64 && VT == MVT::i64;
   Register Reg = StringSwitch<Register>(RegName)
                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
                    .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
@@ -14870,6 +15251,9 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   if (!VT.isSimple())
     return false;
 
+  if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+    return false;
+
   if (VT.getSimpleVT().isVector()) {
     if (Subtarget.hasVSX()) {
       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
@@ -14889,7 +15273,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   return true;
 }
 
-bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+                                                   EVT VT) const {
   VT = VT.getScalarType();
 
   if (!VT.isSimple())
@@ -15278,7 +15663,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
     return SDValue();
 
   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
-    switch (this->Subtarget.getDarwinDirective()) {
+    switch (this->Subtarget.getCPUDirective()) {
     default:
       // TODO: enhance the condition for subtarget before pwr8
       return false;
@@ -15288,6 +15673,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
       // vector        7       2      2
       return true;
     case PPC::DIR_PWR9:
+    case PPC::DIR_PWR_FUTURE:
       //  type        mul     add    shl
       // scalar        5       2      2
       // vector        7       2      2
@@ -15357,12 +15743,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   if (!CI->isTailCall())
     return false;
 
-  // If tail calls are disabled for the caller then we are done.
-  const Function *Caller = CI->getParent()->getParent();
-  auto Attr = Caller->getFnAttribute("disable-tail-calls");
-  if (Attr.getValueAsString() == "true")
-    return false;
-
   // If sibling calls have been disabled and tail-calls aren't guaranteed
   // there is no reason to duplicate.
   auto &TM = getTargetMachine();
@@ -15375,6 +15755,7 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
     return false;
 
   // Make sure the callee and caller calling conventions are eligible for tco.
+  const Function *Caller = CI->getParent()->getParent();
   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
                                            CI->getCallingConv()))
       return false;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 62922ea2d4c4..e0c381827b87 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -43,460 +43,475 @@ namespace llvm {
     // that come before it. For example, ADD or MUL should be placed before
     // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
     // after it.
-    enum NodeType : unsigned {
-      // Start the numbering where the builtin ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-      /// FSEL - Traditional three-operand fsel node.
-      ///
-      FSEL,
-
-      /// FCFID - The FCFID instruction, taking an f64 operand and producing
-      /// and f64 value containing the FP representation of the integer that
-      /// was temporarily in the f64 operand.
-      FCFID,
-
-      /// Newer FCFID[US] integer-to-floating-point conversion instructions for
-      /// unsigned integers and single-precision outputs.
-      FCFIDU, FCFIDS, FCFIDUS,
-
-      /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
-      /// operand, producing an f64 value containing the integer representation
-      /// of that FP value.
-      FCTIDZ, FCTIWZ,
-
-      /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
-      /// unsigned integers with round toward zero.
-      FCTIDUZ, FCTIWUZ,
-
-      /// Floating-point-to-interger conversion instructions
-      FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR,
-
-      /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
-      /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
-      VEXTS,
-
-      /// SExtVElems, takes an input vector of a smaller type and sign
-      /// extends to an output vector of a larger type.
-      SExtVElems,
-
-      /// Reciprocal estimate instructions (unary FP ops).
-      FRE, FRSQRTE,
-
-      // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
-      // three v4f32 operands and producing a v4f32 result.
-      VMADDFP, VNMSUBFP,
-
-      /// VPERM - The PPC VPERM Instruction.
-      ///
-      VPERM,
-
-      /// XXSPLT - The PPC VSX splat instructions
-      ///
-      XXSPLT,
-
-      /// VECINSERT - The PPC vector insert instruction
-      ///
-      VECINSERT,
-
-      /// XXREVERSE - The PPC VSX reverse instruction
-      ///
-      XXREVERSE,
-
-      /// VECSHL - The PPC vector shift left instruction
-      ///
-      VECSHL,
-
-      /// XXPERMDI - The PPC XXPERMDI instruction
-      ///
-      XXPERMDI,
-
-      /// The CMPB instruction (takes two operands of i32 or i64).
-      CMPB,
-
-      /// Hi/Lo - These represent the high and low 16-bit parts of a global
-      /// address respectively.  These nodes have two operands, the first of
-      /// which must be a TargetGlobalAddress, and the second of which must be a
-      /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
-      /// though these are usually folded into other nodes.
-      Hi, Lo,
-
-      /// The following two target-specific nodes are used for calls through
-      /// function pointers in the 64-bit SVR4 ABI.
-
-      /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
-      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
-      /// compute an allocation on the stack.
-      DYNALLOC,
-
-      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
-      /// compute an offset from native SP to the address  of the most recent
-      /// dynamic alloca.
-      DYNAREAOFFSET,
-
-      /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
-      /// at function entry, used for PIC code.
-      GlobalBaseReg,
-
-      /// These nodes represent PPC shifts.
-      ///
-      /// For scalar types, only the last `n + 1` bits of the shift amounts
-      /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
-      /// for exact behaviors.
-      ///
-      /// For vector types, only the last n bits are used. See vsld.
-      SRL, SRA, SHL,
-
-      /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
-      /// word and shift left immediate.
-      EXTSWSLI,
-
-      /// The combination of sra[wd]i and addze used to implemented signed
-      /// integer division by a power of 2. The first operand is the dividend,
-      /// and the second is the constant shift amount (representing the
-      /// divisor).
-      SRA_ADDZE,
-
-      /// CALL - A direct function call.
-      /// CALL_NOP is a call with the special NOP which follows 64-bit
-      /// SVR4 calls and 32-bit/64-bit AIX calls.
-      CALL, CALL_NOP,
-
-      /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
-      /// MTCTR instruction.
-      MTCTR,
-
-      /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
-      /// BCTRL instruction.
-      BCTRL,
-
-      /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
-      /// instruction and the TOC reload required on SVR4 PPC64.
-      BCTRL_LOAD_TOC,
-
-      /// Return with a flag operand, matched by 'blr'
-      RET_FLAG,
-
-      /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
-      /// This copies the bits corresponding to the specified CRREG into the
-      /// resultant GPR.  Bits corresponding to other CR regs are undefined.
-      MFOCRF,
-
-      /// Direct move from a VSX register to a GPR
-      MFVSR,
-
-      /// Direct move from a GPR to a VSX register (algebraic)
-      MTVSRA,
-
-      /// Direct move from a GPR to a VSX register (zero)
-      MTVSRZ,
-
-      /// Direct move of 2 consecutive GPR to a VSX register.
-      BUILD_FP128,
-
-      /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
-      /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
-      /// unsupported for this target.
-      /// Merge 2 GPRs to a single SPE register.
-      BUILD_SPE64,
-
-      /// Extract SPE register component, second argument is high or low.
-      EXTRACT_SPE,
-
-      /// Extract a subvector from signed integer vector and convert to FP.
-      /// It is primarily used to convert a (widened) illegal integer vector
-      /// type to a legal floating point vector type.
-      /// For example v2i32 -> widened to v4i32 -> v2f64
-      SINT_VEC_TO_FP,
-
-      /// Extract a subvector from unsigned integer vector and convert to FP.
-      /// As with SINT_VEC_TO_FP, used for converting illegal types.
-      UINT_VEC_TO_FP,
-
-      // FIXME: Remove these once the ANDI glue bug is fixed:
-      /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
-      /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
-      /// implement truncation of i32 or i64 to i1.
-      ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
-
-      // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
-      // target (returns (Lo, Hi)). It takes a chain operand.
-      READ_TIME_BASE,
-
-      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
-      EH_SJLJ_SETJMP,
-
-      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
-      EH_SJLJ_LONGJMP,
-
-      /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
-      /// instructions.  For lack of better number, we use the opcode number
-      /// encoding for the OPC field to identify the compare.  For example, 838
-      /// is VCMPGTSH.
-      VCMP,
-
-      /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
-      /// altivec VCMP*o instructions.  For lack of better number, we use the
-      /// opcode number encoding for the OPC field to identify the compare.  For
-      /// example, 838 is VCMPGTSH.
-      VCMPo,
-
-      /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
-      /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
-      /// condition register to branch on, OPC is the branch opcode to use (e.g.
-      /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
-      /// an optional input flag argument.
-      COND_BRANCH,
-
-      /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
-      /// loops.
-      BDNZ, BDZ,
-
-      /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
-      /// towards zero.  Used only as part of the long double-to-int
-      /// conversion sequence.
-      FADDRTZ,
-
-      /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
-      MFFS,
-
-      /// TC_RETURN - A tail call return.
-      ///   operand #0 chain
-      ///   operand #1 callee (register or absolute)
-      ///   operand #2 stack adjustment
-      ///   operand #3 optional in flag
-      TC_RETURN,
-
-      /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
-      CR6SET,
-      CR6UNSET,
-
-      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
-      /// for non-position independent code on PPC32.
-      PPC32_GOT,
-
-      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
-      /// local dynamic TLS and position indendepent code on PPC32.
-      PPC32_PICGOT,
-
-      /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
-      /// TLS model, produces an ADDIS8 instruction that adds the GOT
-      /// base to sym\@got\@tprel\@ha.
-      ADDIS_GOT_TPREL_HA,
-
-      /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
-      /// TLS model, produces a LD instruction with base register G8RReg
-      /// and offset sym\@got\@tprel\@l.  This completes the addition that
-      /// finds the offset of "sym" relative to the thread pointer.
-      LD_GOT_TPREL_L,
-
-      /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
-      /// model, produces an ADD instruction that adds the contents of
-      /// G8RReg to the thread pointer.  Symbol contains a relocation
-      /// sym\@tls which is to be replaced by the thread pointer and
-      /// identifies to the linker that the instruction is part of a
-      /// TLS sequence.
-      ADD_TLS,
-
-      /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
-      /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym\@got\@tlsgd\@ha.
-      ADDIS_TLSGD_HA,
-
-      /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
-      /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
-      /// ADDIS_TLSGD_L_ADDR until after register assignment.
-      ADDI_TLSGD_L,
-
-      /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
-      /// ADDIS_TLSGD_L_ADDR until after register assignment.
-      GET_TLS_ADDR,
-
-      /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
-      /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
-      /// register assignment.
-      ADDI_TLSGD_L_ADDR,
-
-      /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym\@got\@tlsld\@ha.
-      ADDIS_TLSLD_HA,
-
-      /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
-      /// ADDIS_TLSLD_L_ADDR until after register assignment.
-      ADDI_TLSLD_L,
-
-      /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
-      /// ADDIS_TLSLD_L_ADDR until after register assignment.
-      GET_TLSLD_ADDR,
-
-      /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
-      /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
-      /// following register assignment.
-      ADDI_TLSLD_L_ADDR,
-
-      /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDIS8 instruction that adds X3 to
-      /// sym\@dtprel\@ha.
-      ADDIS_DTPREL_HA,
-
-      /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@dtprel\@l.
-      ADDI_DTPREL_L,
-
-      /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
-      /// during instruction selection to optimize a BUILD_VECTOR into
-      /// operations on splats.  This is necessary to avoid losing these
-      /// optimizations due to constant folding.
-      VADD_SPLAT,
-
-      /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
-      /// operand identifies the operating system entry point.
-      SC,
-
-      /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
-      CLRBHRB,
-
-      /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
-      /// history rolling buffer entry.
-      MFBHRBE,
-
-      /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
-      RFEBB,
-
-      /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
-      /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
-      /// or stxvd2x instruction.  The chain is necessary because the
-      /// sequence replaces a load and needs to provide the same number
-      /// of outputs.
-      XXSWAPD,
-
-      /// An SDNode for swaps that are not associated with any loads/stores
-      /// and thereby have no chain.
-      SWAP_NO_CHAIN,
-      
-      /// An SDNode for Power9 vector absolute value difference.
-      /// operand #0 vector
-      /// operand #1 vector
-      /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
-      /// the most significant bit for signed i32
-      ///
-      /// Power9 VABSD* instructions are designed to support unsigned integer
-      /// vectors (byte/halfword/word), if we want to make use of them for signed
-      /// integer vectors, we have to flip their sign bits first. To flip sign bit
-      /// for byte/halfword integer vector would become inefficient, but for word
-      /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
-      /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) 
-      ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
-      VABSD,
-
-      /// QVFPERM = This corresponds to the QPX qvfperm instruction.
-      QVFPERM,
-
-      /// QVGPCI = This corresponds to the QPX qvgpci instruction.
-      QVGPCI,
-
-      /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
-      QVALIGNI,
-
-      /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
-      QVESPLATI,
-
-      /// QBFLT = Access the underlying QPX floating-point boolean
-      /// representation.
-      QBFLT,
-
-      /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
-      /// lower (IDX=1) half of v4f32 to v2f64.
-      FP_EXTEND_HALF,
-
-      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
-      /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
-      /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
-      /// i32.
-      STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
-
-      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
-      /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
-      /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
-      /// or i32.
-      LBRX,
-
-      /// STFIWX - The STFIWX instruction.  The first operand is an input token
-      /// chain, then an f64 value to store, then an address to store it to.
-      STFIWX,
-
-      /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
-      /// load which sign-extends from a 32-bit integer value into the
-      /// destination 64-bit register.
-      LFIWAX,
-
-      /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
-      /// load which zero-extends from a 32-bit integer value into the
-      /// destination 64-bit register.
-      LFIWZX,
-
-      /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
-      /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
-      /// This can be used for converting loaded integers to floating point.
-      LXSIZX,
-
-      /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
-      /// chain, then an f64 value to store, then an address to store it to,
-      /// followed by a byte-width for the store.
-      STXSIX,
-
-      /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
-      /// Maps directly to an lxvd2x instruction that will be followed by
-      /// an xxswapd.
-      LXVD2X,
-
-      /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
-      /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
-      /// the vector type to load vector in big-endian element order.
-      LOAD_VEC_BE,
-
-      /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
-      /// v2f32 value into the lower half of a VSR register.
-      LD_VSX_LH,
-
-      /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
-      /// instructions such as LXVDSX, LXVWSX.
-      LD_SPLAT,
-
-      /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
-      /// Maps directly to an stxvd2x instruction that will be preceded by
-      /// an xxswapd.
-      STXVD2X,
-
-      /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
-      /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
-      /// the vector type to store vector in big-endian element order.
-      STORE_VEC_BE,
-
-      /// Store scalar integers from VSR.
-      ST_VSR_SCAL_INT,
-
-      /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
-      /// The 4xf32 load used for v4i1 constants.
-      QVLFSb,
-
-      /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
-      /// except they ensure that the compare input is zero-extended for
-      /// sub-word versions because the atomic loads zero-extend.
-      ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
-
-      /// GPRC = TOC_ENTRY GA, TOC
-      /// Loads the entry for GA from the TOC, where the TOC base is given by
-      /// the last operand.
-      TOC_ENTRY
-    };
+  enum NodeType : unsigned {
+    // Start the numbering where the builtin ops and target ops leave off.
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+    /// FSEL - Traditional three-operand fsel node.
+    ///
+    FSEL,
+
+    /// XSMAXCDP, XSMINCDP - C-type min/max instructions.
+    XSMAXCDP,
+    XSMINCDP,
+
+    /// FCFID - The FCFID instruction, taking an f64 operand and producing
+    /// and f64 value containing the FP representation of the integer that
+    /// was temporarily in the f64 operand.
+    FCFID,
+
+    /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+    /// unsigned integers and single-precision outputs.
+    FCFIDU,
+    FCFIDS,
+    FCFIDUS,
+
+    /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+    /// operand, producing an f64 value containing the integer representation
+    /// of that FP value.
+    FCTIDZ,
+    FCTIWZ,
+
+    /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+    /// unsigned integers with round toward zero.
+    FCTIDUZ,
+    FCTIWUZ,
+
+    /// Floating-point-to-interger conversion instructions
+    FP_TO_UINT_IN_VSR,
+    FP_TO_SINT_IN_VSR,
+
+    /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
+    /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
+    VEXTS,
+
+    /// SExtVElems, takes an input vector of a smaller type and sign
+    /// extends to an output vector of a larger type.
+    SExtVElems,
+
+    /// Reciprocal estimate instructions (unary FP ops).
+    FRE,
+    FRSQRTE,
+
+    // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+    // three v4f32 operands and producing a v4f32 result.
+    VMADDFP,
+    VNMSUBFP,
+
+    /// VPERM - The PPC VPERM Instruction.
+    ///
+    VPERM,
+
+    /// XXSPLT - The PPC VSX splat instructions
+    ///
+    XXSPLT,
+
+    /// VECINSERT - The PPC vector insert instruction
+    ///
+    VECINSERT,
+
+    /// VECSHL - The PPC vector shift left instruction
+    ///
+    VECSHL,
+
+    /// XXPERMDI - The PPC XXPERMDI instruction
+    ///
+    XXPERMDI,
+
+    /// The CMPB instruction (takes two operands of i32 or i64).
+    CMPB,
+
+    /// Hi/Lo - These represent the high and low 16-bit parts of a global
+    /// address respectively.  These nodes have two operands, the first of
+    /// which must be a TargetGlobalAddress, and the second of which must be a
+    /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
+    /// though these are usually folded into other nodes.
+    Hi,
+    Lo,
+
+    /// The following two target-specific nodes are used for calls through
+    /// function pointers in the 64-bit SVR4 ABI.
+
+    /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+    /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+    /// compute an allocation on the stack.
+    DYNALLOC,
+
+    /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+    /// compute an offset from native SP to the address  of the most recent
+    /// dynamic alloca.
+    DYNAREAOFFSET,
+
+    /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+    /// at function entry, used for PIC code.
+    GlobalBaseReg,
+
+    /// These nodes represent PPC shifts.
+    ///
+    /// For scalar types, only the last `n + 1` bits of the shift amounts
+    /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
+    /// for exact behaviors.
+    ///
+    /// For vector types, only the last n bits are used. See vsld.
+    SRL,
+    SRA,
+    SHL,
+
+    /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+    /// word and shift left immediate.
+    EXTSWSLI,
+
+    /// The combination of sra[wd]i and addze used to implemented signed
+    /// integer division by a power of 2. The first operand is the dividend,
+    /// and the second is the constant shift amount (representing the
+    /// divisor).
+    SRA_ADDZE,
+
+    /// CALL - A direct function call.
+    /// CALL_NOP is a call with the special NOP which follows 64-bit
+    /// SVR4 calls and 32-bit/64-bit AIX calls.
+    CALL,
+    CALL_NOP,
+
+    /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+    /// MTCTR instruction.
+    MTCTR,
+
+    /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+    /// BCTRL instruction.
+    BCTRL,
+
+    /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
+    /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
+    /// and 64-bit AIX.
+    BCTRL_LOAD_TOC,
+
+    /// Return with a flag operand, matched by 'blr'
+    RET_FLAG,
+
+    /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+    /// This copies the bits corresponding to the specified CRREG into the
+    /// resultant GPR.  Bits corresponding to other CR regs are undefined.
+    MFOCRF,
+
+    /// Direct move from a VSX register to a GPR
+    MFVSR,
+
+    /// Direct move from a GPR to a VSX register (algebraic)
+    MTVSRA,
+
+    /// Direct move from a GPR to a VSX register (zero)
+    MTVSRZ,
+
+    /// Direct move of 2 consecutive GPR to a VSX register.
+    BUILD_FP128,
+
+    /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+    /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+    /// unsupported for this target.
+    /// Merge 2 GPRs to a single SPE register.
+    BUILD_SPE64,
+
+    /// Extract SPE register component, second argument is high or low.
+    EXTRACT_SPE,
+
+    /// Extract a subvector from signed integer vector and convert to FP.
+    /// It is primarily used to convert a (widened) illegal integer vector
+    /// type to a legal floating point vector type.
+    /// For example v2i32 -> widened to v4i32 -> v2f64
+    SINT_VEC_TO_FP,
+
+    /// Extract a subvector from unsigned integer vector and convert to FP.
+    /// As with SINT_VEC_TO_FP, used for converting illegal types.
+    UINT_VEC_TO_FP,
+
+    // FIXME: Remove these once the ANDI glue bug is fixed:
+    /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+    /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+    /// implement truncation of i32 or i64 to i1.
+    ANDI_rec_1_EQ_BIT,
+    ANDI_rec_1_GT_BIT,
+
+    // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+    // target (returns (Lo, Hi)). It takes a chain operand.
+    READ_TIME_BASE,
+
+    // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+    EH_SJLJ_SETJMP,
+
+    // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+    EH_SJLJ_LONGJMP,
+
+    /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+    /// instructions.  For lack of better number, we use the opcode number
+    /// encoding for the OPC field to identify the compare.  For example, 838
+    /// is VCMPGTSH.
+    VCMP,
+
+    /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+    /// altivec VCMP*o instructions.  For lack of better number, we use the
+    /// opcode number encoding for the OPC field to identify the compare.  For
+    /// example, 838 is VCMPGTSH.
+    VCMPo,
+
+    /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+    /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
+    /// condition register to branch on, OPC is the branch opcode to use (e.g.
+    /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+    /// an optional input flag argument.
+    COND_BRANCH,
+
+    /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+    /// loops.
+    BDNZ,
+    BDZ,
+
+    /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+    /// towards zero.  Used only as part of the long double-to-int
+    /// conversion sequence.
+    FADDRTZ,
+
+    /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+    MFFS,
+
+    /// TC_RETURN - A tail call return.
+    ///   operand #0 chain
+    ///   operand #1 callee (register or absolute)
+    ///   operand #2 stack adjustment
+    ///   operand #3 optional in flag
+    TC_RETURN,
+
+    /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+    CR6SET,
+    CR6UNSET,
+
+    /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+    /// for non-position independent code on PPC32.
+    PPC32_GOT,
+
+    /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+    /// local dynamic TLS and position indendepent code on PPC32.
+    PPC32_PICGOT,
+
+    /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
+    /// TLS model, produces an ADDIS8 instruction that adds the GOT
+    /// base to sym\@got\@tprel\@ha.
+    ADDIS_GOT_TPREL_HA,
+
+    /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+    /// TLS model, produces a LD instruction with base register G8RReg
+    /// and offset sym\@got\@tprel\@l.  This completes the addition that
+    /// finds the offset of "sym" relative to the thread pointer.
+    LD_GOT_TPREL_L,
+
+    /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+    /// model, produces an ADD instruction that adds the contents of
+    /// G8RReg to the thread pointer.  Symbol contains a relocation
+    /// sym\@tls which is to be replaced by the thread pointer and
+    /// identifies to the linker that the instruction is part of a
+    /// TLS sequence.
+    ADD_TLS,
+
+    /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
+    /// model, produces an ADDIS8 instruction that adds the GOT base
+    /// register to sym\@got\@tlsgd\@ha.
+    ADDIS_TLSGD_HA,
+
+    /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+    /// model, produces an ADDI8 instruction that adds G8RReg to
+    /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
+    /// ADDIS_TLSGD_L_ADDR until after register assignment.
+    ADDI_TLSGD_L,
+
+    /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
+    /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
+    /// ADDIS_TLSGD_L_ADDR until after register assignment.
+    GET_TLS_ADDR,
+
+    /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+    /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+    /// register assignment.
+    ADDI_TLSGD_L_ADDR,
+
+    /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
+    /// model, produces an ADDIS8 instruction that adds the GOT base
+    /// register to sym\@got\@tlsld\@ha.
+    ADDIS_TLSLD_HA,
+
+    /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+    /// model, produces an ADDI8 instruction that adds G8RReg to
+    /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
+    /// ADDIS_TLSLD_L_ADDR until after register assignment.
+    ADDI_TLSLD_L,
+
+    /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
+    /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
+    /// ADDIS_TLSLD_L_ADDR until after register assignment.
+    GET_TLSLD_ADDR,
+
+    /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+    /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+    /// following register assignment.
+    ADDI_TLSLD_L_ADDR,
+
+    /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
+    /// model, produces an ADDIS8 instruction that adds X3 to
+    /// sym\@dtprel\@ha.
+    ADDIS_DTPREL_HA,
+
+    /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+    /// model, produces an ADDI8 instruction that adds G8RReg to
+    /// sym\@got\@dtprel\@l.
+    ADDI_DTPREL_L,
+
+    /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+    /// during instruction selection to optimize a BUILD_VECTOR into
+    /// operations on splats.  This is necessary to avoid losing these
+    /// optimizations due to constant folding.
+    VADD_SPLAT,
+
+    /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
+    /// operand identifies the operating system entry point.
+    SC,
+
+    /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
+    CLRBHRB,
+
+    /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
+    /// history rolling buffer entry.
+    MFBHRBE,
+
+    /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
+    RFEBB,
+
+    /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+    /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
+    /// or stxvd2x instruction.  The chain is necessary because the
+    /// sequence replaces a load and needs to provide the same number
+    /// of outputs.
+    XXSWAPD,
+
+    /// An SDNode for swaps that are not associated with any loads/stores
+    /// and thereby have no chain.
+    SWAP_NO_CHAIN,
+
+    /// An SDNode for Power9 vector absolute value difference.
+    /// operand #0 vector
+    /// operand #1 vector
+    /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+    /// the most significant bit for signed i32
+    ///
+    /// Power9 VABSD* instructions are designed to support unsigned integer
+    /// vectors (byte/halfword/word), if we want to make use of them for signed
+    /// integer vectors, we have to flip their sign bits first. To flip sign bit
+    /// for byte/halfword integer vector would become inefficient, but for word
+    /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+    /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
+    ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
+    VABSD,
+
+    /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+    QVFPERM,
+
+    /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+    QVGPCI,
+
+    /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+    QVALIGNI,
+
+    /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+    QVESPLATI,
+
+    /// QBFLT = Access the underlying QPX floating-point boolean
+    /// representation.
+    QBFLT,
+
+    /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
+    /// lower (IDX=1) half of v4f32 to v2f64.
+    FP_EXTEND_HALF,
+
+    /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+    /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
+    /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
+    /// i32.
+    STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+    /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+    /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
+    /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
+    /// or i32.
+    LBRX,
+
+    /// STFIWX - The STFIWX instruction.  The first operand is an input token
+    /// chain, then an f64 value to store, then an address to store it to.
+    STFIWX,
+
+    /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+    /// load which sign-extends from a 32-bit integer value into the
+    /// destination 64-bit register.
+    LFIWAX,
+
+    /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+    /// load which zero-extends from a 32-bit integer value into the
+    /// destination 64-bit register.
+    LFIWZX,
+
+    /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
+    /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
+    /// This can be used for converting loaded integers to floating point.
+    LXSIZX,
+
+    /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
+    /// chain, then an f64 value to store, then an address to store it to,
+    /// followed by a byte-width for the store.
+    STXSIX,
+
+    /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+    /// Maps directly to an lxvd2x instruction that will be followed by
+    /// an xxswapd.
+    LXVD2X,
+
+    /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
+    /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
+    /// the vector type to load vector in big-endian element order.
+    LOAD_VEC_BE,
+
+    /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+    /// v2f32 value into the lower half of a VSR register.
+    LD_VSX_LH,
+
+    /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
+    /// instructions such as LXVDSX, LXVWSX.
+    LD_SPLAT,
+
+    /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+    /// Maps directly to an stxvd2x instruction that will be preceded by
+    /// an xxswapd.
+    STXVD2X,
+
+    /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
+    /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
+    /// the vector type to store vector in big-endian element order.
+    STORE_VEC_BE,
+
+    /// Store scalar integers from VSR.
+    ST_VSR_SCAL_INT,
+
+    /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+    /// The 4xf32 load used for v4i1 constants.
+    QVLFSb,
+
+    /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+    /// except they ensure that the compare input is zero-extended for
+    /// sub-word versions because the atomic loads zero-extend.
+    ATOMIC_CMP_SWAP_8,
+    ATOMIC_CMP_SWAP_16,
+
+    /// GPRC = TOC_ENTRY GA, TOC
+    /// Loads the entry for GA from the TOC, where the TOC base is given by
+    /// the last operand.
+    TOC_ENTRY
+  };
 
   } // end namespace PPCISD
 
@@ -647,6 +662,10 @@ namespace llvm {
       return true;
     }
 
+    bool isEqualityCmpFoldedWithSignedCmp() const override {
+      return false;
+    }
+
     bool hasAndNotCompare(SDValue) const override {
       return true;
     }
@@ -733,7 +752,7 @@ namespace llvm {
     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                           SmallVectorImpl<SDNode *> &Created) const override;
 
-    Register getRegisterByName(const char* RegName, EVT VT,
+    Register getRegisterByName(const char* RegName, LLT VT,
                                const MachineFunction &MF) const override;
 
     void computeKnownBitsForTargetNode(const SDValue Op,
@@ -900,7 +919,8 @@ namespace llvm {
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
     /// expanded to FMAs when this method returns true, otherwise fmuladd is
     /// expanded to fmul + fadd.
-    bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+    bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+                                    EVT VT) const override;
 
     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
@@ -1113,6 +1133,10 @@ namespace llvm {
                               SelectionDAG &DAG, SDValue ArgVal,
                               const SDLoc &dl) const;
 
+    SDValue LowerFormalArguments_AIX(
+        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+        const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerFormalArguments_Darwin(
         SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
         const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index f16187149d36..43431a1e0069 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -71,13 +71,14 @@ def SRL64 : SDNodeXForm<imm, [{
 
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
-  let isReturn = 1, Uses = [LR8, RM] in
+  let isReturn = 1, isPredicable = 1, Uses = [LR8, RM] in
     def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
                             [(retflag)]>, Requires<[In64BitMode]>;
   let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
-    def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
-                             []>,
-        Requires<[In64BitMode]>;
+    let isPredicable = 1 in
+      def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+                               []>,
+          Requires<[In64BitMode]>;
     def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
                               "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
                               []>,
@@ -141,9 +142,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
                              [(PPCcall_nop (i64 imm:$func))]>;
   }
   let Uses = [CTR8, RM] in {
-    def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
-                              "bctrl", IIC_BrB, [(PPCbctrl)]>,
-                 Requires<[In64BitMode]>;
+    let isPredicable = 1 in
+      def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+                                "bctrl", IIC_BrB, [(PPCbctrl)]>,
+                   Requires<[In64BitMode]>;
 
     let isCodeGenOnly = 1 in {
       def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
@@ -253,7 +255,7 @@ def LDARX : XForm_1_memOp<31,  84, (outs g8rc:$rD), (ins memrr:$ptr),
 // Instruction to support lock versions of atomics
 // (EH=1 - see Power ISA 2.07 Book II 4.4.2)
 def LDARXL : XForm_1<31,  84, (outs g8rc:$rD), (ins memrr:$ptr),
-                     "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+                     "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isRecordForm;
 
 let hasExtraDefRegAllocReq = 1 in
 def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
@@ -263,7 +265,7 @@ def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
 
 let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
 def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
-                          "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
+                          "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isRecordForm;
 
 let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
 def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
@@ -410,6 +412,7 @@ def DYNALLOC8 : PPCEmitTimePseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri
 def DYNAREAOFFSET8 : PPCEmitTimePseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
                        [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
 
+let hasSideEffects = 0 in {
 let Defs = [LR8] in {
 def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
                            "mtlr $rS", IIC_SprMTSPR>,
@@ -421,6 +424,7 @@ def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 } // Interpretation64Bit
+}
 
 //===----------------------------------------------------------------------===//
 // Fixed point instructions.
@@ -474,14 +478,14 @@ defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
 
 // Logical ops with immediate.
 let Defs = [CR0] in {
-def ANDIo8  : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+def ANDI8_rec  : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
                       "andi. $dst, $src1, $src2", IIC_IntGeneral,
                       [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
-                      isDOT;
-def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+                      isRecordForm;
+def ANDIS8_rec : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
                      "andis. $dst, $src1, $src2", IIC_IntGeneral,
                     [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
-                     isDOT;
+                     isRecordForm;
 }
 def ORI8    : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
                       "ori $dst, $src1, $src2", IIC_IntSimple,
@@ -497,9 +501,9 @@ def XORIS8  : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
                    [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
 
 let isCommutable = 1 in
-defm ADD8  : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "add", "$rT, $rA, $rB", IIC_IntSimple,
-                       [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+defm ADD8  : XOForm_1rx<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "add", "$rT, $rA, $rB", IIC_IntSimple,
+                        [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
 // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
 // initial-exec thread-local storage model.  We need to forbid r0 here -
 // while it works for add just fine, the linker can relax this to local-exec
@@ -576,9 +580,9 @@ defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                         "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
                         [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
                         PPC970_DGroup_Cracked;
-defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "subf", "$rT, $rA, $rB", IIC_IntGeneral,
-                       [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm SUBF8 : XOForm_1rx<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+                        [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
 defm NEG8    : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
                         "neg", "$rT, $rA", IIC_IntSimple,
                         [(set i64:$rT, (ineg i64:$rA))]>;
@@ -777,10 +781,10 @@ defm DIVD  : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
 defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                           "divdu", "$rT, $rA, $rB", IIC_IntDivD,
                           [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
-def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                     "divde $rT, $rA, $rB", IIC_IntDivD,
-                     [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
-                     isPPC64, Requires<[HasExtDiv]>;
+defm DIVDE : XOForm_1rcr<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                         "divde", "$rT, $rA, $rB", IIC_IntDivD,
+                         [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+                         isPPC64, Requires<[HasExtDiv]>;
 
 let Predicates = [IsISA3_0] in {
 def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
@@ -815,24 +819,14 @@ def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                         [(set i64:$rT, (urem i64:$rA, i64:$rB))]>;
 }
 
-let Defs = [CR0] in
-def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                      "divde. $rT, $rA, $rB", IIC_IntDivD,
-                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
-                      isPPC64, Requires<[HasExtDiv]>;
-def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                      "divdeu $rT, $rA, $rB", IIC_IntDivD,
-                      [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
-                      isPPC64, Requires<[HasExtDiv]>;
-let Defs = [CR0] in
-def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "divdeu. $rT, $rA, $rB", IIC_IntDivD,
-                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
-                        isPPC64, Requires<[HasExtDiv]>;
+defm DIVDEU : XOForm_1rcr<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "divdeu", "$rT, $rA, $rB", IIC_IntDivD,
+                          [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+                          isPPC64, Requires<[HasExtDiv]>;
 let isCommutable = 1 in
-defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
-                       [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+defm MULLD : XOForm_1rx<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
+                        [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in
 def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
                        "mulli $rD, $rA, $imm", IIC_IntMulLI,
@@ -943,7 +937,7 @@ def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
                         [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
                         PPC970_DGroup_Cracked;
 // For fast-isel:
-let isCodeGenOnly = 1, mayLoad = 1 in {
+let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
 def LWA_32  : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
                       "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
                       PPC970_DGroup_Cracked;
@@ -1469,7 +1463,7 @@ class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 def CP_COPY8   : X_L1_RA5_RB5<31, 774, "copy"  , g8rc, IIC_LdStCOPY, []>;
 def CP_PASTE8  : X_L1_RA5_RB5<31, 902, "paste" , g8rc, IIC_LdStPASTE, []>;
-def CP_PASTE8o : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isDOT;
+def CP_PASTE8_rec : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isRecordForm;
 }
 
 // SLB Invalidate Entry Global
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index fd3fc2af2327..f94816a35f79 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -261,6 +261,11 @@ def vecspltisw : PatLeaf<(build_vector), [{
   return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != nullptr;
 }], VSPLTISW_get_imm>;
 
+def immEQOneV : PatLeaf<(build_vector), [{
+  if (ConstantSDNode *C = cast<BuildVectorSDNode>(N)->getConstantSplatNode())
+    return C->isOne();
+  return false;
+}]>;
 //===----------------------------------------------------------------------===//
 // Helpers for defining instructions that directly correspond to intrinsics.
 
@@ -518,19 +523,19 @@ def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
 def VCFSX  : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vcfsx $vD, $vB, $UIMM", IIC_VecFP,
                       [(set v4f32:$vD,
-                             (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
+                             (int_ppc_altivec_vcfsx v4i32:$vB, timm:$UIMM))]>;
 def VCFUX  : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vcfux $vD, $vB, $UIMM", IIC_VecFP,
                       [(set v4f32:$vD,
-                             (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
+                             (int_ppc_altivec_vcfux v4i32:$vB, timm:$UIMM))]>;
 def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vctsxs $vD, $vB, $UIMM", IIC_VecFP,
                       [(set v4i32:$vD,
-                             (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
+                             (int_ppc_altivec_vctsxs v4f32:$vB, timm:$UIMM))]>;
 def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vctuxs $vD, $vB, $UIMM", IIC_VecFP,
                       [(set v4i32:$vD,
-                             (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
+                             (int_ppc_altivec_vctuxs v4f32:$vB, timm:$UIMM))]>;
 
 // Defines with the UIM field set to 0 for floating-point
 // to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -706,7 +711,7 @@ def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
                       [(set v16i8:$vD,
                         (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
   def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
                          "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>;
   def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
@@ -789,37 +794,37 @@ class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
 
 // f32 element comparisons.0
 def VCMPBFP   : VCMP <966, "vcmpbfp $vD, $vA, $vB"  , v4f32>;
-def VCMPBFPo  : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPBFP_rec  : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
 def VCMPEQFP  : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
-def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPEQFP_rec : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
 def VCMPGEFP  : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
-def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP_rec : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
 def VCMPGTFP  : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
-def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP_rec : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
 
 // i8 element comparisons.
 def VCMPEQUB  : VCMP <  6, "vcmpequb $vD, $vA, $vB" , v16i8>;
-def VCMPEQUBo : VCMPo<  6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPEQUB_rec : VCMPo<  6, "vcmpequb. $vD, $vA, $vB", v16i8>;
 def VCMPGTSB  : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
-def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB_rec : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
 def VCMPGTUB  : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
-def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB_rec : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
 
 // i16 element comparisons.
 def VCMPEQUH  : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
-def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPEQUH_rec : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
 def VCMPGTSH  : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
-def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH_rec : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
 def VCMPGTUH  : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
-def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH_rec : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
 
 // i32 element comparisons.
 def VCMPEQUW  : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
-def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPEQUW_rec : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
 def VCMPGTSW  : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
-def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW_rec : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
 def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
-def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW_rec : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
 
 let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
     isReMaterializable = 1 in {
@@ -856,6 +861,14 @@ def V_SETALLONES  : VXForm_3<908, (outs vrrc:$vD), (ins),
 def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
 def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
 
+// Rotates.
+def : Pat<(v16i8 (rotl v16i8:$vA, v16i8:$vB)),
+          (v16i8 (VRLB v16i8:$vA, v16i8:$vB))>;
+def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
+          (v8i16 (VRLH v8i16:$vA, v8i16:$vB))>;
+def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
+          (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
+
 // Loads.
 def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
 
@@ -1092,6 +1105,20 @@ def : Pat<(v4f32 (vselect v4i32:$vA, v4f32:$vB, v4f32:$vC)),
 def : Pat<(v2f64 (vselect v2i64:$vA, v2f64:$vB, v2f64:$vC)),
           (VSEL $vC, $vB, $vA)>;
 
+// Vector Integer Average Instructions
+def : Pat<(v4i32 (sra (sub v4i32:$vA, (vnot_ppc v4i32:$vB)),
+          (v4i32 (immEQOneV)))), (v4i32 (VAVGSW $vA, $vB))>;
+def : Pat<(v8i16 (sra (sub v8i16:$vA, (v8i16 (bitconvert(vnot_ppc v4i32:$vB)))),
+          (v8i16 (immEQOneV)))), (v8i16 (VAVGSH $vA, $vB))>;
+def : Pat<(v16i8 (sra (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
+          (v16i8 (immEQOneV)))), (v16i8 (VAVGSB $vA, $vB))>;
+def : Pat<(v4i32 (srl (sub v4i32:$vA, (vnot_ppc v4i32:$vB)),
+          (v4i32 (immEQOneV)))), (v4i32 (VAVGUW $vA, $vB))>;
+def : Pat<(v8i16 (srl (sub v8i16:$vA, (v8i16 (bitconvert(vnot_ppc v4i32:$vB)))),
+          (v8i16 (immEQOneV)))), (v8i16 (VAVGUH $vA, $vB))>;
+def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
+          (v16i8 (immEQOneV)))), (v16i8 (VAVGUB $vA, $vB))>;
+
 } // end HasAltivec
 
 def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
@@ -1140,9 +1167,13 @@ def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
 def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
         (VMRGOW $vB, $vA)>;
 
+// Vector rotates.
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+
+def : Pat<(v2i64 (rotl v2i64:$vA, v2i64:$vB)),
+          (v2i64 (VRLD v2i64:$vA, v2i64:$vB))>;
 
 // Vector shifts
-def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
 def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                     "vsld $vD, $vA, $vB", IIC_VecGeneral, []>;
 def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
@@ -1245,11 +1276,11 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
 
 // i64 element comparisons.
 def VCMPEQUD  : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
-def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPEQUD_rec : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
 def VCMPGTSD  : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
-def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD_rec : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
 def VCMPGTUD  : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
-def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD_rec : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
 
 // The cryptography instructions that do not require Category:Vector.Crypto
 def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
@@ -1313,21 +1344,21 @@ let Predicates = [HasP9Altivec] in {
 
 // i8 element comparisons.
 def VCMPNEB   : VCMP   <  7, "vcmpneb $vD, $vA, $vB"  , v16i8>;
-def VCMPNEBo  : VCMPo  <  7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
+def VCMPNEB_rec  : VCMPo  <  7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
 def VCMPNEZB  : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZBo : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEZB_rec : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
 
 // i16 element comparisons.
 def VCMPNEH   : VCMP < 71, "vcmpneh $vD, $vA, $vB"  , v8i16>;
-def VCMPNEHo  : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
+def VCMPNEH_rec  : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
 def VCMPNEZH  : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZHo : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEZH_rec : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
 
 // i32 element comparisons.
 def VCMPNEW   : VCMP <135, "vcmpnew $vD, $vA, $vB"  , v4i32>;
-def VCMPNEWo  : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
+def VCMPNEW_rec  : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
 def VCMPNEZW  : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZWo : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEZW_rec : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
 
 // VX-Form: [PO VRT / UIM VRB XO].
 // We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
@@ -1347,12 +1378,14 @@ def VEXTRACTUW : VX1_VT5_UIM5_VB5<653, "vextractuw", []>;
 def VEXTRACTD  : VX1_VT5_UIM5_VB5<717, "vextractd" , []>;
 
 // Vector Extract Unsigned Byte/Halfword/Word Left/Right-Indexed
+let hasSideEffects = 0 in {
 def VEXTUBLX : VX1_RT5_RA5_VB5<1549, "vextublx", []>;
 def VEXTUBRX : VX1_RT5_RA5_VB5<1805, "vextubrx", []>;
 def VEXTUHLX : VX1_RT5_RA5_VB5<1613, "vextuhlx", []>;
 def VEXTUHRX : VX1_RT5_RA5_VB5<1869, "vextuhrx", []>;
 def VEXTUWLX : VX1_RT5_RA5_VB5<1677, "vextuwlx", []>;
 def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>;
+}
 
 // Vector Insert Element Instructions
 def VINSERTB : VXForm_1<781, (outs vrrc:$vD),
@@ -1410,6 +1443,12 @@ let isCodeGenOnly = 1 in {
   def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>;
 }
 
+def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i8)), (v4i32 (VEXTSB2W $VRB))>;
+def : Pat<(v4i32 (sext_inreg v4i32:$VRB, v4i16)), (v4i32 (VEXTSH2W $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i8)), (v2i64 (VEXTSB2D $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i16)), (v2i64 (VEXTSH2D $VRB))>;
+def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i32)), (v2i64 (VEXTSW2D $VRB))>;
+
 // Vector Integer Negate
 def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw",
                            [(set v4i32:$vD,
@@ -1496,18 +1535,18 @@ class VX_VT5_EO5_VB5_XO9_o<bits<5> eo, bits<9> xo, string opc,
 }
 
 // Decimal Convert From/to National/Zoned/Signed-QWord
-def BCDCFNo  : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>;
-def BCDCFZo  : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>;
-def BCDCTNo  : VX_VT5_EO5_VB5_XO9_o    <5, 385, "bcdctn." , []>;
-def BCDCTZo  : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>;
-def BCDCFSQo : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>;
-def BCDCTSQo : VX_VT5_EO5_VB5_XO9_o    <0, 385, "bcdctsq.", []>;
+def BCDCFN_rec  : VX_VT5_EO5_VB5_PS1_XO9_o<7, 385, "bcdcfn." , []>;
+def BCDCFZ_rec  : VX_VT5_EO5_VB5_PS1_XO9_o<6, 385, "bcdcfz." , []>;
+def BCDCTN_rec  : VX_VT5_EO5_VB5_XO9_o    <5, 385, "bcdctn." , []>;
+def BCDCTZ_rec  : VX_VT5_EO5_VB5_PS1_XO9_o<4, 385, "bcdctz." , []>;
+def BCDCFSQ_rec : VX_VT5_EO5_VB5_PS1_XO9_o<2, 385, "bcdcfsq.", []>;
+def BCDCTSQ_rec : VX_VT5_EO5_VB5_XO9_o    <0, 385, "bcdctsq.", []>;
 
 // Decimal Copy-Sign/Set-Sign
 let Defs = [CR6] in
-def BCDCPSGNo : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
+def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
 
-def BCDSETSGNo : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
+def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
 
 // [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
 class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
@@ -1526,13 +1565,13 @@ class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
 }
 
 // Decimal Shift/Unsigned-Shift/Shift-and-Round
-def BCDSo :  VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
-def BCDUSo : VX_VT5_VA5_VB5_XO9_o    <129, "bcdus.", []>;
-def BCDSRo : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>;
+def BCDS_rec :  VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
+def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o    <129, "bcdus.", []>;
+def BCDSR_rec : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>;
 
 // Decimal (Unsigned) Truncate
-def BCDTRUNCo :  VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
-def BCDUTRUNCo : VX_VT5_VA5_VB5_XO9_o    <321, "bcdutrunc.", []>;
+def BCDTRUNC_rec :  VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
+def BCDUTRUNC_rec : VX_VT5_VA5_VB5_XO9_o    <321, "bcdutrunc.", []>;
 
 // Absolute Difference
 def VABSDUB : VXForm_1<1027, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 96b9c9a119c0..115bd44ea202 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -262,8 +262,8 @@ class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
   : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
 
-  // Even though ADDICo does not really have an RC bit, provide
-  // the declaration of one here so that isDOT has something to set.
+  // Even though ADDIC_rec does not really have an RC bit, provide
+  // the declaration of one here so that isRecordForm has something to set.
   bit RC = 0;
 }
 
@@ -428,7 +428,7 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = RST;
   let Inst{11-15} = A;
@@ -463,7 +463,7 @@ class XForm_base_r3xo_swapped
   bits<5> RST;
   bits<5> B;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = RST;
   let Inst{11-15} = A;
@@ -744,7 +744,7 @@ class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = RST;
   let Inst{11-20} = 0;
@@ -757,7 +757,7 @@ class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Pattern = pattern;
   bits<5> FM;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = FM;
   let Inst{11-20} = 0;
@@ -902,7 +902,7 @@ class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   : I<opcode, OOL, IOL, asmstr, itin> {
   bit L;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{7-9}   = 0;
   let Inst{10}    = L;
@@ -1265,7 +1265,7 @@ class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = XT{4-0};
   let Inst{11-15} = XA{4-0};
@@ -1529,6 +1529,29 @@ class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1,
   let BH = 0;
 }
 
+class XLForm_2_ext_and_DForm_1<bits<6> opcode1, bits<10> xo1, bits<5> bo,
+                               bits<5> bi, bit lk, bits<6> opcode2, dag OOL,
+                               dag IOL, string asmstr, InstrItinClass itin,
+                               list<dag> pattern>
+  : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+
+  bits<5>  RST;
+  bits<21> D_RA;
+
+  let Pattern = pattern;
+
+  let Inst{6-10} = bo;
+  let Inst{11-15} = bi;
+  let Inst{16-18} = 0;
+  let Inst{19-20} = 0;  // Unused (BH)
+  let Inst{21-30} = xo1;
+  let Inst{31} = lk;
+
+  let Inst{38-42} = RST;
+  let Inst{43-47} = D_RA{20-16};  // Base Register
+  let Inst{48-63} = D_RA{15-0};   // Displacement
+}
+
 // 1.7.8 XFX-Form
 class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                 InstrItinClass itin>
@@ -1628,7 +1651,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   bits<8> FM;
   bits<5> rT;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
   let Pattern = pattern;
 
   let Inst{6} = 0;
@@ -1647,7 +1670,7 @@ class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   bit W;
   bits<5> FRB;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
   let Pattern = pattern;
 
   let Inst{6}     = L;
@@ -1666,7 +1689,7 @@ class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
   bits<5> RS;
   bits<6> SH;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
   let Pattern = pattern;
 
   let Inst{6-10}  = RS;
@@ -1687,7 +1710,7 @@ class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asms
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = RT;
   let Inst{11-15} = RA;
@@ -1714,7 +1737,7 @@ class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = FRT;
   let Inst{11-15} = FRA;
@@ -1774,7 +1797,7 @@ class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = RS;
   let Inst{11-15} = RA;
@@ -1800,7 +1823,7 @@ class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = RS;
   let Inst{11-15} = RA;
@@ -1821,7 +1844,7 @@ class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = RS;
   let Inst{11-15} = RA;
@@ -2083,7 +2106,7 @@ class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = FRT;
   let Inst{11-15} = FRA;
@@ -2107,7 +2130,7 @@ class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = FRT;
   let Inst{11-22} = idx;
@@ -2125,7 +2148,7 @@ class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
 
   let Pattern = pattern;
 
-  bit RC = 0;    // set by isDOT
+  bit RC = 0;    // set by isRecordForm
 
   let Inst{6-10}  = VRT;
   let Inst{11-14} = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 104b57a70a2e..6cbf999ca73d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -36,7 +36,7 @@ def TEND : XForm_htm1 <31, 686,
 
 def TABORT : XForm_base_r3xo <31, 910,
                               (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
-                              []>, isDOT {
+                              []>, isRecordForm {
   let RST = 0;
   let B = 0;
 }
@@ -44,38 +44,38 @@ def TABORT : XForm_base_r3xo <31, 910,
 def TABORTWC : XForm_base_r3xo <31, 782,
                                 (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
                                 "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
-                                isDOT;
+                                isRecordForm;
 
 def TABORTWCI : XForm_base_r3xo <31, 846,
                                  (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
                                  "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
-                                 isDOT;
+                                 isRecordForm;
 
 def TABORTDC : XForm_base_r3xo <31, 814,
                                 (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
                                 "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
-                                isDOT;
+                                isRecordForm;
 
 def TABORTDCI : XForm_base_r3xo <31, 878,
                                  (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
                                  "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
-                                 isDOT;
+                                 isRecordForm;
 
 def TSR : XForm_htm2 <31, 750,
                       (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
-                      isDOT;
+                      isRecordForm;
 
 def TRECLAIM : XForm_base_r3xo <31, 942,
                                 (outs), (ins gprc:$A), "treclaim. $A",
                                 IIC_SprMTSPR, []>,
-                                isDOT {
+                                isRecordForm {
   let RST = 0;
   let B = 0;
 }
 
 def TRECHKPT : XForm_base_r3xo <31, 1006,
                                 (outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
-                                isDOT {
+                                isRecordForm {
   let RST = 0;
   let A = 0;
   let B = 0;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b10672965c9..30906a32b00c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -108,7 +108,7 @@ ScheduleHazardRecognizer *
 PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
                                            const ScheduleDAG *DAG) const {
   unsigned Directive =
-      static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
+      static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
   if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
       Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
     const InstrItineraryData *II =
@@ -125,7 +125,7 @@ ScheduleHazardRecognizer *
 PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                                  const ScheduleDAG *DAG) const {
   unsigned Directive =
-      DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
+      DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
 
   // FIXME: Leaving this as-is until we have POWER9 scheduling info
   if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
@@ -202,7 +202,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
 
     // On some cores, there is an additional delay between writing to a condition
     // register, and using it from a branch.
-    unsigned Directive = Subtarget.getDarwinDirective();
+    unsigned Directive = Subtarget.getCPUDirective();
     switch (Directive) {
     default: break;
     case PPC::DIR_7400:
@@ -371,7 +371,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
   MachineFunction &MF = *MI.getParent()->getParent();
 
   // Normal instructions can be commuted the obvious way.
-  if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMIo)
+  if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
     return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
   // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
@@ -391,7 +391,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
 
   // Swap op1/op2
   assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
-         "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
+         "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
   Register Reg0 = MI.getOperand(0).getReg();
   Register Reg1 = MI.getOperand(1).getReg();
   Register Reg2 = MI.getOperand(2).getReg();
@@ -469,7 +469,7 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI) const {
   // This function is used for scheduling, and the nop wanted here is the type
   // that terminates dispatch groups on the POWER cores.
-  unsigned Directive = Subtarget.getDarwinDirective();
+  unsigned Directive = Subtarget.getCPUDirective();
   unsigned Opcode;
   switch (Directive) {
   default:            Opcode = PPC::NOP; break;
@@ -903,15 +903,15 @@ static unsigned getCRBitValue(unsigned CRBit) {
 
 void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I,
-                               const DebugLoc &DL, unsigned DestReg,
-                               unsigned SrcReg, bool KillSrc) const {
+                               const DebugLoc &DL, MCRegister DestReg,
+                               MCRegister SrcReg, bool KillSrc) const {
   // We can end up with self copies and similar things as a result of VSX copy
   // legalization. Promote them here.
   const TargetRegisterInfo *TRI = &getRegisterInfo();
   if (PPC::F8RCRegClass.contains(DestReg) &&
       PPC::VSRCRegClass.contains(SrcReg)) {
-    unsigned SuperReg =
-      TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+    MCRegister SuperReg =
+        TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
 
     if (VSXSelfCopyCrash && SrcReg == SuperReg)
       llvm_unreachable("nop VSX copy");
@@ -919,8 +919,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     DestReg = SuperReg;
   } else if (PPC::F8RCRegClass.contains(SrcReg) &&
              PPC::VSRCRegClass.contains(DestReg)) {
-    unsigned SuperReg =
-      TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+    MCRegister SuperReg =
+        TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
 
     if (VSXSelfCopyCrash && DestReg == SuperReg)
       llvm_unreachable("nop VSX copy");
@@ -931,7 +931,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Different class register copy
   if (PPC::CRBITRCRegClass.contains(SrcReg) &&
       PPC::GPRCRegClass.contains(DestReg)) {
-    unsigned CRReg = getCRFromCRBit(SrcReg);
+    MCRegister CRReg = getCRFromCRBit(SrcReg);
     BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
     getKillRegState(KillSrc);
     // Rotate the CR bit in the CR fields to be the least significant bit and
@@ -1587,22 +1587,6 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
   return Found;
 }
 
-bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const {
-  unsigned OpC = MI.getOpcode();
-  switch (OpC) {
-  default:
-    return false;
-  case PPC::B:
-  case PPC::BLR:
-  case PPC::BLR8:
-  case PPC::BCTR:
-  case PPC::BCTR8:
-  case PPC::BCTRL:
-  case PPC::BCTRL8:
-    return true;
-  }
-}
-
 bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
                                   unsigned &SrcReg2, int &Mask,
                                   int &Value) const {
@@ -1836,8 +1820,8 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
 
   int NewOpC = -1;
   int MIOpC = MI->getOpcode();
-  if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8 ||
-      MIOpC == PPC::ANDISo || MIOpC == PPC::ANDISo8)
+  if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
+      MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
     NewOpC = MIOpC;
   else {
     NewOpC = PPC::getRecordFormOpcode(MIOpC);
@@ -1943,9 +1927,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
         Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
         // The mask value needs to shift right 16 if we're emitting andis.
         Mask >>= MBInLoHWord ? 0 : 16;
-        NewOpC = MIOpC == PPC::RLWINM ?
-          (MBInLoHWord ? PPC::ANDIo : PPC::ANDISo) :
-          (MBInLoHWord ? PPC::ANDIo8 :PPC::ANDISo8);
+        NewOpC = MIOpC == PPC::RLWINM
+                     ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
+                     : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
       } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
                  (ME - MB + 1 == SH) && (MB >= 16)) {
         // If we are rotating by the exact number of bits as are in the mask
@@ -1953,7 +1937,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
         // that's just an andis. (as long as the GPR result has no uses).
         Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
         Mask >>= 16;
-        NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDISo :PPC::ANDISo8;
+        NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
       }
       // If we've set the mask, we can transform.
       if (Mask != ~0LLU) {
@@ -1966,7 +1950,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
       int64_t MB = MI->getOperand(3).getImm();
       if (MB >= 48) {
         uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
-        NewOpC = PPC::ANDIo8;
+        NewOpC = PPC::ANDI8_rec;
         MI->RemoveOperand(3);
         MI->getOperand(2).setImm(Mask);
         NumRcRotatesConvertedToRcAnd++;
@@ -2306,7 +2290,7 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
 
   // Replace the instruction.
   if (LII.SetCR) {
-    MI.setDesc(get(LII.Is64Bit ? PPC::ANDIo8 : PPC::ANDIo));
+    MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
     // Set the immediate.
     MachineInstrBuilder(*MI.getParent()->getParent(), MI)
         .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
@@ -2370,15 +2354,13 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
     ImmInstrInfo III;
     unsigned Opc = MI.getOpcode();
     bool ConvertibleImmForm =
-      Opc == PPC::CMPWI || Opc == PPC::CMPLWI ||
-      Opc == PPC::CMPDI || Opc == PPC::CMPLDI ||
-      Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
-      Opc == PPC::ORI || Opc == PPC::ORI8 ||
-      Opc == PPC::XORI || Opc == PPC::XORI8 ||
-      Opc == PPC::RLDICL || Opc == PPC::RLDICLo ||
-      Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
-      Opc == PPC::RLWINM || Opc == PPC::RLWINMo ||
-      Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+        Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
+        Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
+        Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
+        Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
+        Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
+        Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
+        Opc == PPC::RLWINM8_rec;
     bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
                        ? isVFRegister(MI.getOperand(0).getReg())
                        : false;
@@ -2527,6 +2509,225 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
          "RegNo should be killed or dead");
 }
 
+// This opt tries to convert the following imm form to an index form to save an
+// add for stack variables.
+// Return false if no such pattern found.
+//
+// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
+// ADD instr:  ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
+// Imm instr:  Reg            = op OffsetImm, ToBeDeletedReg(killed)
+//
+// can be converted to:
+//
+// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
+// Index instr:    Reg            = opx ScaleReg, ToBeChangedReg(killed)
+//
+// In order to eliminate ADD instr, make sure that:
+// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
+//    new ADDI instr and ADDI can only take int16 Imm.
+// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
+//    between ADDI and ADD instr since its original def in ADDI will be changed
+//    in new ADDI instr. And also there should be no new def for it between
+//    ADD and Imm instr as ToBeChangedReg will be used in Index instr.
+// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
+//    between ADD and Imm instr since ADD instr will be eliminated.
+// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
+//    moved to Index instr.
+bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
+  MachineFunction *MF = MI.getParent()->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  bool PostRA = !MRI->isSSA();
+  // Do this opt after PEI which is after RA. The reason is stack slot expansion
+  // in PEI may expose such opportunities since in PEI, stack slot offsets to
+  // frame base(OffsetAddi) are determined.
+  if (!PostRA)
+    return false;
+  unsigned ToBeDeletedReg = 0;
+  int64_t OffsetImm = 0;
+  unsigned XFormOpcode = 0;
+  ImmInstrInfo III;
+
+  // Check if Imm instr meets requirement.
+  if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
+                                    III))
+    return false;
+
+  bool OtherIntermediateUse = false;
+  MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
+
+  // Exit if there is other use between ADD and Imm instr or no def found.
+  if (OtherIntermediateUse || !ADDMI)
+    return false;
+
+  // Check if ADD instr meets requirement.
+  if (!isADDInstrEligibleForFolding(*ADDMI))
+    return false;
+
+  unsigned ScaleRegIdx = 0;
+  int64_t OffsetAddi = 0;
+  MachineInstr *ADDIMI = nullptr;
+
+  // Check if there is a valid ToBeChangedReg in ADDMI.
+  // 1: It must be killed.
+  // 2: Its definition must be a valid ADDIMI.
+  // 3: It must satify int16 offset requirement.
+  if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
+    ScaleRegIdx = 2;
+  else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
+    ScaleRegIdx = 1;
+  else
+    return false;
+
+  assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
+  unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg();
+  unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
+  auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
+                       MachineBasicBlock::iterator End) {
+    for (auto It = ++Start; It != End; It++)
+      if (It->modifiesRegister(Reg, &getRegisterInfo()))
+        return true;
+    return false;
+  };
+  // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
+  // and Imm Instr.
+  if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
+    return false;
+
+  // Now start to do the transformation.
+  LLVM_DEBUG(dbgs() << "Replace instruction: "
+                    << "\n");
+  LLVM_DEBUG(ADDIMI->dump());
+  LLVM_DEBUG(ADDMI->dump());
+  LLVM_DEBUG(MI.dump());
+  LLVM_DEBUG(dbgs() << "with: "
+                    << "\n");
+
+  // Update ADDI instr.
+  ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
+
+  // Update Imm instr.
+  MI.setDesc(get(XFormOpcode));
+  MI.getOperand(III.ImmOpNo)
+      .ChangeToRegister(ScaleReg, false, false,
+                        ADDMI->getOperand(ScaleRegIdx).isKill());
+
+  MI.getOperand(III.OpNoForForwarding)
+      .ChangeToRegister(ToBeChangedReg, false, false, true);
+
+  // Eliminate ADD instr.
+  ADDMI->eraseFromParent();
+
+  LLVM_DEBUG(ADDIMI->dump());
+  LLVM_DEBUG(MI.dump());
+
+  return true;
+}
+
+bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
+                                                 int64_t &Imm) const {
+  unsigned Opc = ADDIMI.getOpcode();
+
+  // Exit if the instruction is not ADDI.
+  if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
+    return false;
+
+  Imm = ADDIMI.getOperand(2).getImm();
+
+  return true;
+}
+
+bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {
+  unsigned Opc = ADDMI.getOpcode();
+
+  // Exit if the instruction is not ADD.
+  return Opc == PPC::ADD4 || Opc == PPC::ADD8;
+}
+
+bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
+                                                unsigned &ToBeDeletedReg,
+                                                unsigned &XFormOpcode,
+                                                int64_t &OffsetImm,
+                                                ImmInstrInfo &III) const {
+  // Only handle load/store.
+  if (!MI.mayLoadOrStore())
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+
+  XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
+
+  // Exit if instruction has no index form.
+  if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
+    return false;
+
+  // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
+  if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()),
+                       III, true))
+    return false;
+
+  if (!III.IsSummingOperands)
+    return false;
+
+  MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
+  MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
+  // Only support imm operands, not relocation slots or others.
+  if (!ImmOperand.isImm())
+    return false;
+
+  assert(RegOperand.isReg() && "Instruction format is not right");
+
+  // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
+  if (!RegOperand.isKill())
+    return false;
+
+  ToBeDeletedReg = RegOperand.getReg();
+  OffsetImm = ImmOperand.getImm();
+
+  return true;
+}
+
+bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+                                         MachineInstr *&ADDIMI,
+                                         int64_t &OffsetAddi,
+                                         int64_t OffsetImm) const {
+  assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
+  MachineOperand &MO = ADDMI->getOperand(Index);
+
+  if (!MO.isKill())
+    return false;
+
+  bool OtherIntermediateUse = false;
+
+  ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
+  // Currently handle only one "add + Imminstr" pair case, exit if other
+  // intermediate use for ToBeChangedReg found.
+  // TODO: handle the cases where there are other "add + Imminstr" pairs
+  // with same offset in Imminstr which is like:
+  //
+  // ADDI instr: ToBeChangedReg  = ADDI FrameBaseReg, OffsetAddi
+  // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
+  // Imm instr1: Reg1            = op1 OffsetImm, ToBeDeletedReg1(killed)
+  // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
+  // Imm instr2: Reg2            = op2 OffsetImm, ToBeDeletedReg2(killed)
+  //
+  // can be converted to:
+  //
+  // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
+  //                                       (OffsetAddi + OffsetImm)
+  // Index instr1:   Reg1           = opx1 ScaleReg1, ToBeChangedReg
+  // Index instr2:   Reg2           = opx2 ScaleReg2, ToBeChangedReg(killed)
+
+  if (OtherIntermediateUse || !ADDIMI)
+    return false;
+  // Check if ADDI instr meets requirement.
+  if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
+    return false;
+
+  if (isInt<16>(OffsetAddi + OffsetImm))
+    return true;
+  return false;
+}
+
 // If this instruction has an immediate form and one of its operands is a
 // result of a load-immediate or an add-immediate, convert it to
 // the immediate form if the constant is in range.
@@ -2660,34 +2861,34 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
     return false;
   }
   case PPC::RLDICL:
-  case PPC::RLDICLo:
+  case PPC::RLDICL_rec:
   case PPC::RLDICL_32:
   case PPC::RLDICL_32_64: {
     // Use APInt's rotate function.
     int64_t SH = MI.getOperand(2).getImm();
     int64_t MB = MI.getOperand(3).getImm();
-    APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICLo) ?
-                64 : 32, SExtImm, true);
+    APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
+                SExtImm, true);
     InVal = InVal.rotl(SH);
     uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
     InVal &= Mask;
     // Can't replace negative values with an LI as that will sign-extend
     // and not clear the left bits. If we're setting the CR bit, we will use
-    // ANDIo which won't sign extend, so that's safe.
+    // ANDI_rec which won't sign extend, so that's safe.
     if (isUInt<15>(InVal.getSExtValue()) ||
-        (Opc == PPC::RLDICLo && isUInt<16>(InVal.getSExtValue()))) {
+        (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
       ReplaceWithLI = true;
       Is64BitLI = Opc != PPC::RLDICL_32;
       NewImm = InVal.getSExtValue();
-      SetCR = Opc == PPC::RLDICLo;
+      SetCR = Opc == PPC::RLDICL_rec;
       break;
     }
     return false;
   }
   case PPC::RLWINM:
   case PPC::RLWINM8:
-  case PPC::RLWINMo:
-  case PPC::RLWINM8o: {
+  case PPC::RLWINM_rec:
+  case PPC::RLWINM8_rec: {
     int64_t SH = MI.getOperand(2).getImm();
     int64_t MB = MI.getOperand(3).getImm();
     int64_t ME = MI.getOperand(4).getImm();
@@ -2698,15 +2899,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
     InVal &= Mask;
     // Can't replace negative values with an LI as that will sign-extend
     // and not clear the left bits. If we're setting the CR bit, we will use
-    // ANDIo which won't sign extend, so that's safe.
+    // ANDI_rec which won't sign extend, so that's safe.
     bool ValueFits = isUInt<15>(InVal.getSExtValue());
-    ValueFits |= ((Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o) &&
+    ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
                   isUInt<16>(InVal.getSExtValue()));
     if (ValueFits) {
       ReplaceWithLI = true;
-      Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
+      Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
       NewImm = InVal.getSExtValue();
-      SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
+      SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
       break;
     }
     return false;
@@ -2768,7 +2969,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
     LII.Is64Bit = Is64BitLI;
     LII.SetCR = SetCR;
     // If we're setting the CR, the original load-immediate must be kept (as an
-    // operand to ANDIo/ANDI8o).
+    // operand to ANDI_rec/ANDI8_rec).
     if (KilledDef && SetCR)
       *KilledDef = nullptr;
     replaceInstrWithLI(MI, LII);
@@ -2819,13 +3020,13 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
     III.IsSummingOperands = true;
     III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
     break;
-  case PPC::ADDCo:
+  case PPC::ADDC_rec:
     III.SignedImm = true;
     III.ZeroIsSpecialOrig = 0;
     III.ZeroIsSpecialNew = 0;
     III.IsCommutative = true;
     III.IsSummingOperands = true;
-    III.ImmOpcode = PPC::ADDICo;
+    III.ImmOpcode = PPC::ADDIC_rec;
     break;
   case PPC::SUBFC:
   case PPC::SUBFC8:
@@ -2851,8 +3052,8 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
     III.IsCommutative = false;
     III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
     break;
-  case PPC::ANDo:
-  case PPC::AND8o:
+  case PPC::AND_rec:
+  case PPC::AND8_rec:
   case PPC::OR:
   case PPC::OR8:
   case PPC::XOR:
@@ -2863,8 +3064,12 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
     III.IsCommutative = true;
     switch(Opc) {
     default: llvm_unreachable("Unknown opcode");
-    case PPC::ANDo: III.ImmOpcode = PPC::ANDIo; break;
-    case PPC::AND8o: III.ImmOpcode = PPC::ANDIo8; break;
+    case PPC::AND_rec:
+      III.ImmOpcode = PPC::ANDI_rec;
+      break;
+    case PPC::AND8_rec:
+      III.ImmOpcode = PPC::ANDI8_rec;
+      break;
     case PPC::OR: III.ImmOpcode = PPC::ORI; break;
     case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
     case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
@@ -2873,18 +3078,18 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
     break;
   case PPC::RLWNM:
   case PPC::RLWNM8:
-  case PPC::RLWNMo:
-  case PPC::RLWNM8o:
+  case PPC::RLWNM_rec:
+  case PPC::RLWNM8_rec:
   case PPC::SLW:
   case PPC::SLW8:
-  case PPC::SLWo:
-  case PPC::SLW8o:
+  case PPC::SLW_rec:
+  case PPC::SLW8_rec:
   case PPC::SRW:
   case PPC::SRW8:
-  case PPC::SRWo:
-  case PPC::SRW8o:
+  case PPC::SRW_rec:
+  case PPC::SRW8_rec:
   case PPC::SRAW:
-  case PPC::SRAWo:
+  case PPC::SRAW_rec:
     III.SignedImm = false;
     III.ZeroIsSpecialOrig = 0;
     III.ZeroIsSpecialNew = 0;
@@ -2894,8 +3099,8 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
     // This does not apply to shift right algebraic because a value
     // out of range will produce a -1/0.
     III.ImmWidth = 16;
-    if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 ||
-        Opc == PPC::RLWNMo || Opc == PPC::RLWNM8o)
+    if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
+        Opc == PPC::RLWNM8_rec)
       III.TruncateImmTo = 5;
     else
       III.TruncateImmTo = 6;
@@ -2903,38 +3108,50 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
     default: llvm_unreachable("Unknown opcode");
     case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
     case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
-    case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
-    case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::RLWNM_rec:
+      III.ImmOpcode = PPC::RLWINM_rec;
+      break;
+    case PPC::RLWNM8_rec:
+      III.ImmOpcode = PPC::RLWINM8_rec;
+      break;
     case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
     case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
-    case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
-    case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SLW_rec:
+      III.ImmOpcode = PPC::RLWINM_rec;
+      break;
+    case PPC::SLW8_rec:
+      III.ImmOpcode = PPC::RLWINM8_rec;
+      break;
     case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
     case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
-    case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
-    case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SRW_rec:
+      III.ImmOpcode = PPC::RLWINM_rec;
+      break;
+    case PPC::SRW8_rec:
+      III.ImmOpcode = PPC::RLWINM8_rec;
+      break;
     case PPC::SRAW:
       III.ImmWidth = 5;
       III.TruncateImmTo = 0;
       III.ImmOpcode = PPC::SRAWI;
       break;
-    case PPC::SRAWo:
+    case PPC::SRAW_rec:
       III.ImmWidth = 5;
       III.TruncateImmTo = 0;
-      III.ImmOpcode = PPC::SRAWIo;
+      III.ImmOpcode = PPC::SRAWI_rec;
       break;
     }
     break;
   case PPC::RLDCL:
-  case PPC::RLDCLo:
+  case PPC::RLDCL_rec:
   case PPC::RLDCR:
-  case PPC::RLDCRo:
+  case PPC::RLDCR_rec:
   case PPC::SLD:
-  case PPC::SLDo:
+  case PPC::SLD_rec:
   case PPC::SRD:
-  case PPC::SRDo:
+  case PPC::SRD_rec:
   case PPC::SRAD:
-  case PPC::SRADo:
+  case PPC::SRAD_rec:
     III.SignedImm = false;
     III.ZeroIsSpecialOrig = 0;
     III.ZeroIsSpecialNew = 0;
@@ -2944,30 +3161,38 @@ bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
     // This does not apply to shift right algebraic because a value
     // out of range will produce a -1/0.
     III.ImmWidth = 16;
-    if (Opc == PPC::RLDCL || Opc == PPC::RLDCLo ||
-        Opc == PPC::RLDCR || Opc == PPC::RLDCRo)
+    if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
+        Opc == PPC::RLDCR_rec)
       III.TruncateImmTo = 6;
     else
       III.TruncateImmTo = 7;
     switch(Opc) {
     default: llvm_unreachable("Unknown opcode");
     case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
-    case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
+    case PPC::RLDCL_rec:
+      III.ImmOpcode = PPC::RLDICL_rec;
+      break;
     case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
-    case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
+    case PPC::RLDCR_rec:
+      III.ImmOpcode = PPC::RLDICR_rec;
+      break;
     case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
-    case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
+    case PPC::SLD_rec:
+      III.ImmOpcode = PPC::RLDICR_rec;
+      break;
     case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
-    case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
+    case PPC::SRD_rec:
+      III.ImmOpcode = PPC::RLDICL_rec;
+      break;
     case PPC::SRAD:
       III.ImmWidth = 6;
       III.TruncateImmTo = 0;
       III.ImmOpcode = PPC::SRADI;
        break;
-    case PPC::SRADo:
+    case PPC::SRAD_rec:
       III.ImmWidth = 6;
       III.TruncateImmTo = 0;
-      III.ImmOpcode = PPC::SRADIo;
+      III.ImmOpcode = PPC::SRADI_rec;
       break;
     }
     break;
@@ -3538,14 +3763,16 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
     ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
 
   unsigned Opc = MI.getOpcode();
-  bool SpecialShift32 =
-    Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
-  bool SpecialShift64 =
-    Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo;
-  bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo ||
-    Opc == PPC::SLDo || Opc == PPC::SRDo;
-  bool RightShift =
-    Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo;
+  bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
+                        Opc == PPC::SRW || Opc == PPC::SRW_rec ||
+                        Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
+                        Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
+  bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
+                        Opc == PPC::SRD || Opc == PPC::SRD_rec;
+  bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
+               Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
+  bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
+                    Opc == PPC::SRD_rec;
 
   MI.setDesc(get(III.ImmOpcode));
   if (ConstantOpNo == III.OpNoForForwarding) {
@@ -3649,27 +3876,21 @@ int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
 // i.e. 0 to 31-th bits are same as 32-th bit.
 static bool isSignExtendingOp(const MachineInstr &MI) {
   int Opcode = MI.getOpcode();
-  if (Opcode == PPC::LI     || Opcode == PPC::LI8     ||
-      Opcode == PPC::LIS    || Opcode == PPC::LIS8    ||
-      Opcode == PPC::SRAW   || Opcode == PPC::SRAWo   ||
-      Opcode == PPC::SRAWI  || Opcode == PPC::SRAWIo  ||
-      Opcode == PPC::LWA    || Opcode == PPC::LWAX    ||
-      Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
-      Opcode == PPC::LHA    || Opcode == PPC::LHAX    ||
-      Opcode == PPC::LHA8   || Opcode == PPC::LHAX8   ||
-      Opcode == PPC::LBZ    || Opcode == PPC::LBZX    ||
-      Opcode == PPC::LBZ8   || Opcode == PPC::LBZX8   ||
-      Opcode == PPC::LBZU   || Opcode == PPC::LBZUX   ||
-      Opcode == PPC::LBZU8  || Opcode == PPC::LBZUX8  ||
-      Opcode == PPC::LHZ    || Opcode == PPC::LHZX    ||
-      Opcode == PPC::LHZ8   || Opcode == PPC::LHZX8   ||
-      Opcode == PPC::LHZU   || Opcode == PPC::LHZUX   ||
-      Opcode == PPC::LHZU8  || Opcode == PPC::LHZUX8  ||
-      Opcode == PPC::EXTSB  || Opcode == PPC::EXTSBo  ||
-      Opcode == PPC::EXTSH  || Opcode == PPC::EXTSHo  ||
-      Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8  ||
-      Opcode == PPC::EXTSW  || Opcode == PPC::EXTSWo  ||
-      Opcode == PPC::SETB   || Opcode == PPC::SETB8   ||
+  if (Opcode == PPC::LI || Opcode == PPC::LI8 || Opcode == PPC::LIS ||
+      Opcode == PPC::LIS8 || Opcode == PPC::SRAW || Opcode == PPC::SRAW_rec ||
+      Opcode == PPC::SRAWI || Opcode == PPC::SRAWI_rec || Opcode == PPC::LWA ||
+      Opcode == PPC::LWAX || Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 ||
+      Opcode == PPC::LHA || Opcode == PPC::LHAX || Opcode == PPC::LHA8 ||
+      Opcode == PPC::LHAX8 || Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
+      Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || Opcode == PPC::LBZU ||
+      Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
+      Opcode == PPC::LHZ || Opcode == PPC::LHZX || Opcode == PPC::LHZ8 ||
+      Opcode == PPC::LHZX8 || Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
+      Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || Opcode == PPC::EXTSB ||
+      Opcode == PPC::EXTSB_rec || Opcode == PPC::EXTSH ||
+      Opcode == PPC::EXTSH_rec || Opcode == PPC::EXTSB8 ||
+      Opcode == PPC::EXTSH8 || Opcode == PPC::EXTSW ||
+      Opcode == PPC::EXTSW_rec || Opcode == PPC::SETB || Opcode == PPC::SETB8 ||
       Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 ||
       Opcode == PPC::EXTSB8_32_64)
     return true;
@@ -3677,8 +3898,8 @@ static bool isSignExtendingOp(const MachineInstr &MI) {
   if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33)
     return true;
 
-  if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
-       Opcode == PPC::RLWNM  || Opcode == PPC::RLWNMo) &&
+  if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+       Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
       MI.getOperand(3).getImm() > 0 &&
       MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
     return true;
@@ -3701,52 +3922,46 @@ static bool isZeroExtendingOp(const MachineInstr &MI) {
 
   // We have some variations of rotate-and-mask instructions
   // that clear higher 32-bits.
-  if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
-       Opcode == PPC::RLDCL  || Opcode == PPC::RLDCLo  ||
+  if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
+       Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
        Opcode == PPC::RLDICL_32_64) &&
       MI.getOperand(3).getImm() >= 32)
     return true;
 
-  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
+  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
       MI.getOperand(3).getImm() >= 32 &&
       MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm())
     return true;
 
-  if ((Opcode == PPC::RLWINM  || Opcode == PPC::RLWINMo ||
-       Opcode == PPC::RLWNM   || Opcode == PPC::RLWNMo  ||
+  if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+       Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
        Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
       MI.getOperand(3).getImm() <= MI.getOperand(4).getImm())
     return true;
 
   // There are other instructions that clear higher 32-bits.
-  if (Opcode == PPC::CNTLZW  || Opcode == PPC::CNTLZWo ||
-      Opcode == PPC::CNTTZW  || Opcode == PPC::CNTTZWo ||
+  if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
+      Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
       Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 ||
-      Opcode == PPC::CNTLZD  || Opcode == PPC::CNTLZDo ||
-      Opcode == PPC::CNTTZD  || Opcode == PPC::CNTTZDo ||
-      Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW ||
-      Opcode == PPC::SLW     || Opcode == PPC::SLWo    ||
-      Opcode == PPC::SRW     || Opcode == PPC::SRWo    ||
-      Opcode == PPC::SLW8    || Opcode == PPC::SRW8    ||
-      Opcode == PPC::SLWI    || Opcode == PPC::SLWIo   ||
-      Opcode == PPC::SRWI    || Opcode == PPC::SRWIo   ||
-      Opcode == PPC::LWZ     || Opcode == PPC::LWZX    ||
-      Opcode == PPC::LWZU    || Opcode == PPC::LWZUX   ||
-      Opcode == PPC::LWBRX   || Opcode == PPC::LHBRX   ||
-      Opcode == PPC::LHZ     || Opcode == PPC::LHZX    ||
-      Opcode == PPC::LHZU    || Opcode == PPC::LHZUX   ||
-      Opcode == PPC::LBZ     || Opcode == PPC::LBZX    ||
-      Opcode == PPC::LBZU    || Opcode == PPC::LBZUX   ||
-      Opcode == PPC::LWZ8    || Opcode == PPC::LWZX8   ||
-      Opcode == PPC::LWZU8   || Opcode == PPC::LWZUX8  ||
-      Opcode == PPC::LWBRX8  || Opcode == PPC::LHBRX8  ||
-      Opcode == PPC::LHZ8    || Opcode == PPC::LHZX8   ||
-      Opcode == PPC::LHZU8   || Opcode == PPC::LHZUX8  ||
-      Opcode == PPC::LBZ8    || Opcode == PPC::LBZX8   ||
-      Opcode == PPC::LBZU8   || Opcode == PPC::LBZUX8  ||
-      Opcode == PPC::ANDIo   || Opcode == PPC::ANDISo  ||
-      Opcode == PPC::ROTRWI  || Opcode == PPC::ROTRWIo ||
-      Opcode == PPC::EXTLWI  || Opcode == PPC::EXTLWIo ||
+      Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
+      Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec ||
+      Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || Opcode == PPC::SLW ||
+      Opcode == PPC::SLW_rec || Opcode == PPC::SRW || Opcode == PPC::SRW_rec ||
+      Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || Opcode == PPC::SLWI ||
+      Opcode == PPC::SLWI_rec || Opcode == PPC::SRWI ||
+      Opcode == PPC::SRWI_rec || Opcode == PPC::LWZ || Opcode == PPC::LWZX ||
+      Opcode == PPC::LWZU || Opcode == PPC::LWZUX || Opcode == PPC::LWBRX ||
+      Opcode == PPC::LHBRX || Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
+      Opcode == PPC::LHZU || Opcode == PPC::LHZUX || Opcode == PPC::LBZ ||
+      Opcode == PPC::LBZX || Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
+      Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || Opcode == PPC::LWZU8 ||
+      Opcode == PPC::LWZUX8 || Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 ||
+      Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || Opcode == PPC::LHZU8 ||
+      Opcode == PPC::LHZUX8 || Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
+      Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 ||
+      Opcode == PPC::ANDI_rec || Opcode == PPC::ANDIS_rec ||
+      Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWI_rec ||
+      Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWI_rec ||
       Opcode == PPC::MFVSRWZ)
     return true;
 
@@ -3840,14 +4055,14 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
     return false;
   }
 
-  case PPC::ANDIo:
-  case PPC::ANDISo:
+  case PPC::ANDI_rec:
+  case PPC::ANDIS_rec:
   case PPC::ORI:
   case PPC::ORIS:
   case PPC::XORI:
   case PPC::XORIS:
-  case PPC::ANDIo8:
-  case PPC::ANDISo8:
+  case PPC::ANDI8_rec:
+  case PPC::ANDIS8_rec:
   case PPC::ORI8:
   case PPC::ORIS8:
   case PPC::XORI8:
@@ -4042,12 +4257,10 @@ MachineInstr *PPCInstrInfo::findLoopInstr(
 // Return true if get the base operand, byte offset of an instruction and the
 // memory width. Width is the size of memory that is being loaded/stored.
 bool PPCInstrInfo::getMemOperandWithOffsetWidth(
-  const MachineInstr &LdSt,
-  const MachineOperand *&BaseReg,
-  int64_t &Offset,
-  unsigned &Width,
-  const TargetRegisterInfo *TRI) const {
-  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+    const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
+    unsigned &Width, const TargetRegisterInfo *TRI) const {
+  if (!LdSt.mayLoadOrStore())
+    return false;
 
   // Handle only loads/stores with base register followed by immediate offset.
   if (LdSt.getNumExplicitOperands() != 3)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 19ab30cb0908..2fe8df0e1d68 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -280,7 +280,7 @@ public:
                     unsigned FalseReg) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                   const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+                   const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
                    bool KillSrc) const override;
 
   void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -346,8 +346,6 @@ public:
   bool DefinesPredicate(MachineInstr &MI,
                         std::vector<MachineOperand> &Pred) const override;
 
-  bool isPredicable(const MachineInstr &MI) const override;
-
   // Comparison optimization.
 
   bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
@@ -422,6 +420,16 @@ public:
 
   bool convertToImmediateForm(MachineInstr &MI,
                               MachineInstr **KilledDef = nullptr) const;
+  bool foldFrameOffset(MachineInstr &MI) const;
+  bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
+  bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
+  bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
+                                    unsigned &XFormOpcode,
+                                    int64_t &OffsetOfImmInstr,
+                                    ImmInstrInfo &III) const;
+  bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
+                             MachineInstr *&ADDIMI, int64_t &OffsetAddi,
+                             int64_t OffsetImm) const;
 
   /// Fixup killed/dead flag for register \p RegNo between instructions [\p
   /// StartMI, \p EndMI]. Some PostRA transformations may violate register
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 24183277519b..b38ca3af63f5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -61,10 +61,6 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
   SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
 ]>;
 
-def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>,
-  SDTCisVec<1>
-]>;
-
 def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>,
   SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
 ]>;
@@ -117,6 +113,10 @@ def SDT_PPCextswsli : SDTypeProfile<1, 2, [  // extswsli
   SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
 ]>;
 
+def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
+]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
@@ -165,7 +165,8 @@ def PPCfsel   : SDNode<"PPCISD::FSEL",
    // Type constraint for fsel.
    SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, 
                         SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
-
+def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
+def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
 def PPChi       : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
 def PPClo       : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
 def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
@@ -199,7 +200,6 @@ def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
 def PPCvperm     : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
 def PPCxxsplt    : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>;
 def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>;
-def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>;
 def PPCxxpermdi  : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>;
 def PPCvecshl    : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>;
 
@@ -486,7 +486,7 @@ def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
 // PowerPC Flag Definitions.
 
 class isPPC64 { bit PPC64 = 1; }
-class isDOT   { bit RC = 1; }
+class isRecordForm   { bit RC = 1; }
 
 class RegConstraint<string C> {
   string Constraints = C;
@@ -961,9 +961,9 @@ multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : XForm_6<opcode, xo, OOL, IOL,
+    def _rec    : XForm_6<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -976,9 +976,9 @@ multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CARRY, CR0] in
-    def o    : XForm_6<opcode, xo, OOL, IOL,
+    def _rec    : XForm_6<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -991,9 +991,9 @@ multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CARRY, CR0] in
-    def o    : XForm_10<opcode, xo, OOL, IOL,
+    def _rec    : XForm_10<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1005,9 +1005,9 @@ multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : XForm_11<opcode, xo, OOL, IOL,
+    def _rec    : XForm_11<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1019,9 +1019,35 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+    def _rec    : XOForm_1<opcode, xo, oe, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
+  }
+}
+
+// Multiclass for instructions which have a record overflow form as well
+// as a record form but no carry (i.e. mulld, mulldo, subf, subfo, etc.)
+multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_1<opcode, xo, 0, OOL, IOL,
+                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                        pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def _rec    : XOForm_1<opcode, xo, 0, OOL, IOL,
+                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                        []>, isRecordForm, RecFormRel;
+  }
+  let BaseName = !strconcat(asmbase, "O") in {
+    let Defs = [XER] in
+    def O    : XOForm_1<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+                        []>, RecFormRel;
+    let Defs = [XER, CR0] in
+    def O_rec    : XOForm_1<opcode, xo, 1, OOL, IOL,
+                         !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+                         []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1035,11 +1061,21 @@ multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+    def _rec    : XOForm_1<opcode, xo, oe, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel, PPC970_DGroup_First,
+                       []>, isRecordForm, RecFormRel, PPC970_DGroup_First,
                        PPC970_DGroup_Cracked;
   }
+  let BaseName = !strconcat(asmbase, "O") in {
+    let Defs = [XER] in
+    def O    : XOForm_1<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+                        []>, RecFormRel;
+    let Defs = [XER, CR0] in
+    def O_rec   : XOForm_1<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+                        []>, isRecordForm, RecFormRel;
+  }
 }
 
 multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
@@ -1051,9 +1087,19 @@ multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CARRY, CR0] in
-    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+    def _rec    : XOForm_1<opcode, xo, oe, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
+  }
+  let BaseName = !strconcat(asmbase, "O") in {
+    let Defs = [CARRY, XER] in
+    def O    : XOForm_1<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+                        []>, RecFormRel;
+    let Defs = [CARRY, XER, CR0] in
+    def O_rec   : XOForm_1<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+                        []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1065,9 +1111,19 @@ multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+    def _rec    : XOForm_3<opcode, xo, oe, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
+  }
+  let BaseName = !strconcat(asmbase, "O") in {
+    let Defs = [XER] in
+    def O    : XOForm_3<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+                        []>, RecFormRel;
+    let Defs = [XER, CR0] in
+    def O_rec   : XOForm_3<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+                        []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1080,9 +1136,19 @@ multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CARRY, CR0] in
-    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+    def _rec    : XOForm_3<opcode, xo, oe, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
+  }
+  let BaseName = !strconcat(asmbase, "O") in {
+    let Defs = [CARRY, XER] in
+    def O    : XOForm_3<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o ", asmstr)), itin,
+                        []>, RecFormRel;
+    let Defs = [CARRY, XER, CR0] in
+    def O_rec   : XOForm_3<opcode, xo, 1, OOL, IOL,
+                        !strconcat(asmbase, !strconcat("o. ", asmstr)), itin,
+                        []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1094,9 +1160,9 @@ multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : MForm_2<opcode, OOL, IOL,
+    def _rec    : MForm_2<opcode, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1108,9 +1174,9 @@ multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : MDForm_1<opcode, xo, OOL, IOL,
+    def _rec    : MDForm_1<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1122,9 +1188,9 @@ multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
                         !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                         pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : MDSForm_1<opcode, xo, OOL, IOL,
+    def _rec    : MDSForm_1<opcode, xo, OOL, IOL,
                         !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                        []>, isDOT, RecFormRel;
+                        []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1137,9 +1203,9 @@ multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CARRY, CR0] in
-    def o    : XSForm_1<opcode, xo, OOL, IOL,
+    def _rec    : XSForm_1<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1151,9 +1217,9 @@ multiclass XSForm_1r<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR0] in
-    def o    : XSForm_1<opcode, xo, OOL, IOL,
+    def _rec    : XSForm_1<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1165,9 +1231,9 @@ multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR1] in
-    def o    : XForm_26<opcode, xo, OOL, IOL,
+    def _rec    : XForm_26<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1179,9 +1245,9 @@ multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR1] in
-    def o    : XForm_28<opcode, xo, OOL, IOL,
+    def _rec    : XForm_28<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1193,9 +1259,9 @@ multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR1] in
-    def o    : AForm_1<opcode, xo, OOL, IOL,
+    def _rec    : AForm_1<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1207,9 +1273,9 @@ multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR1] in
-    def o    : AForm_2<opcode, xo, OOL, IOL,
+    def _rec    : AForm_2<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1221,9 +1287,9 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        pattern>, RecFormRel;
     let Defs = [CR1] in
-    def o    : AForm_3<opcode, xo, OOL, IOL,
+    def _rec    : AForm_3<opcode, xo, OOL, IOL,
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT, RecFormRel;
+                       []>, isRecordForm, RecFormRel;
   }
 }
 
@@ -1324,12 +1390,13 @@ def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
 }
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
-  let isReturn = 1, Uses = [LR, RM] in
+  let isPredicable = 1, isReturn = 1, Uses = [LR, RM] in
     def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
                            [(retflag)]>, Requires<[In32BitMode]>;
   let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
-    def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
-                            []>;
+    let isPredicable = 1 in
+      def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+                              []>;
 
     let isCodeGenOnly = 1 in {
       def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
@@ -1362,9 +1429,10 @@ let Defs = [LR] in
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   let isBarrier = 1 in {
-  def B   : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
-                  "b $dst", IIC_BrB,
-                  [(br bb:$dst)]>;
+    let isPredicable = 1 in
+      def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+                    "b $dst", IIC_BrB,
+                    [(br bb:$dst)]>;
   def BA  : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
                   "ba $dst", IIC_BrB, []>;
   }
@@ -1485,9 +1553,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
     }
   }
   let Uses = [CTR, RM] in {
-    def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
-                             "bctrl", IIC_BrB, [(PPCbctrl)]>,
-                Requires<[In32BitMode]>;
+    let isPredicable = 1 in
+      def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+                              "bctrl", IIC_BrB, [(PPCbctrl)]>,
+                  Requires<[In32BitMode]>;
 
     let isCodeGenOnly = 1 in {
       def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
@@ -1574,6 +1643,15 @@ def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
                  "#TC_RETURNr $dst $offset",
                  []>;
 
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+    Defs = [LR, R2], Uses = [CTR, RM], RST = 2 in {
+  def BCTRL_LWZinto_toc:
+    XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
+     (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
+     [(PPCbctrl_load_toc iaddr:$src)]>, Requires<[In32BitMode]>;
+
+}
+
 
 let isCodeGenOnly = 1 in {
 
@@ -1839,15 +1917,15 @@ def LWARX : XForm_1_memOp<31,  20, (outs gprc:$rD), (ins memrr:$src),
 // Instructions to support lock versions of atomics
 // (EH=1 - see Power ISA 2.07 Book II 4.4.2)
 def LBARXL : XForm_1_memOp<31,  52, (outs gprc:$rD), (ins memrr:$src),
-                     "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+                     "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
                      Requires<[HasPartwordAtomics]>;
 
 def LHARXL : XForm_1_memOp<31,  116, (outs gprc:$rD), (ins memrr:$src),
-                     "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+                     "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
                      Requires<[HasPartwordAtomics]>;
 
 def LWARXL : XForm_1_memOp<31,  20, (outs gprc:$rD), (ins memrr:$src),
-                     "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+                     "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm;
 
 // The atomic instructions use the destination register as well as the next one
 // or two registers in order (modulo 31).
@@ -1860,14 +1938,14 @@ def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC),
 let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
 def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
                     "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
-                    isDOT, Requires<[HasPartwordAtomics]>;
+                    isRecordForm, Requires<[HasPartwordAtomics]>;
 
 def STHCX : XForm_1_memOp<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
                     "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
-                    isDOT, Requires<[HasPartwordAtomics]>;
+                    isRecordForm, Requires<[HasPartwordAtomics]>;
 
 def STWCX : XForm_1_memOp<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
-                    "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+                    "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isRecordForm;
 }
 
 let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
@@ -2034,6 +2112,7 @@ def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
 }
 
 // Load Multiple
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in 
 def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
                   "lmw $rD, $src", IIC_LdStLMW, []>;
 
@@ -2188,6 +2267,7 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
 }
 
 // Store Multiple
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
 def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
                    "stmw $rS, $dst", IIC_LdStLMW, []>;
 
@@ -2221,9 +2301,9 @@ def ADDIC  : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
                      RecFormRel, PPC970_DGroup_Cracked;
 let Defs = [CARRY, CR0] in
-def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+def ADDIC_rec : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "addic. $rD, $rA, $imm", IIC_IntGeneral,
-                     []>, isDOT, RecFormRel;
+                     []>, isRecordForm, RecFormRel;
 }
 def ADDIS  : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
                      "addis $rD, $rA, $imm", IIC_IntSimple,
@@ -2253,14 +2333,14 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
 
 let PPC970_Unit = 1 in {  // FXU Operations.
 let Defs = [CR0] in {
-def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+def ANDI_rec : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "andi. $dst, $src1, $src2", IIC_IntGeneral,
                     [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
-                    isDOT;
-def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+                    isRecordForm;
+def ANDIS_rec : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "andis. $dst, $src1, $src2", IIC_IntGeneral,
                     [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
-                    isDOT;
+                    isRecordForm;
 }
 def ORI   : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "ori $dst, $src1, $src2", IIC_IntSimple,
@@ -2621,6 +2701,7 @@ def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
                 PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
+let hasSideEffects = 0 in {
 let Defs = [LR] in {
 def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
                           "mtlr $rS", IIC_SprMTSPR>,
@@ -2631,6 +2712,7 @@ def MFLR  : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
                           "mflr $rT", IIC_SprMFSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
+}
 
 let isCodeGenOnly = 1 in {
   // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
@@ -2732,8 +2814,8 @@ let Uses = [RM] in {
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 
   let Defs = [CR1] in
-  def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins),
-                      "mffs. $rT", IIC_IntMFFS, []>, isDOT;
+  def MFFS_rec : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+                      "mffs. $rT", IIC_IntMFFS, []>, isRecordForm;
 
   def MFFSCE : X_FRT5_XO2_XO3_XO10<63, 0, 1, 583, (outs f8rc:$rT), (ins),
                                   "mffsce $rT", IIC_IntMFFS, []>,
@@ -2778,9 +2860,9 @@ def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
 let PPC970_Unit = 1, hasSideEffects = 0 in {  // FXU Operations.
 // XO-Form instructions.  Arithmetic instructions that can set overflow bit
 let isCommutable = 1 in
-defm ADD4  : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "add", "$rT, $rA, $rB", IIC_IntSimple,
-                       [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+defm ADD4  : XOForm_1rx<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "add", "$rT, $rA, $rB", IIC_IntSimple,
+                        [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
 let isCodeGenOnly = 1 in
 def ADD4TLS  : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB),
                        "add $rT, $rA, $rB", IIC_IntSimple,
@@ -2797,24 +2879,14 @@ defm DIVW  : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
 defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                           "divwu", "$rT, $rA, $rB", IIC_IntDivW,
                           [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
-def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                     "divwe $rT, $rA, $rB", IIC_IntDivW,
-                     [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
-                     Requires<[HasExtDiv]>;
-let Defs = [CR0] in
-def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                      "divwe. $rT, $rA, $rB", IIC_IntDivW,
-                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
-                      Requires<[HasExtDiv]>;
-def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                      "divweu $rT, $rA, $rB", IIC_IntDivW,
-                      [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
-                      Requires<[HasExtDiv]>;
-let Defs = [CR0] in
-def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "divweu. $rT, $rA, $rB", IIC_IntDivW,
-                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
-                       Requires<[HasExtDiv]>;
+defm DIVWE : XOForm_1rcr<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                         "divwe", "$rT, $rA, $rB", IIC_IntDivW,
+                         [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+                         Requires<[HasExtDiv]>;
+defm DIVWEU : XOForm_1rcr<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                          "divweu", "$rT, $rA, $rB", IIC_IntDivW,
+                          [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+                          Requires<[HasExtDiv]>;
 let isCommutable = 1 in {
 defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                        "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -2822,13 +2894,13 @@ defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
 defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                        "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
                        [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
-                       [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1rx<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
+                        [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
 } // isCommutable
-defm SUBF  : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "subf", "$rT, $rA, $rB", IIC_IntGeneral,
-                       [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBF  : XOForm_1rx<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+                        [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
 defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                         "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
                         [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
@@ -2984,10 +3056,10 @@ def RLWINM : MForm_2<21,
                      "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
                      []>, RecFormRel;
 let Defs = [CR0] in
-def RLWINMo : MForm_2<21,
+def RLWINM_rec : MForm_2<21,
                       (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
                       "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
-                      []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
+                      []>, isRecordForm, RecFormRel, PPC970_DGroup_Cracked;
 }
 defm RLWNM  : MForm_2r<23, (outs gprc:$rA),
                        (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
@@ -3177,6 +3249,11 @@ def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentr
 // the function label.
 def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
 
+// Pseudo-instruction marked for deletion. When deleting the instruction would
+// cause iterator invalidation in MIR transformation passes, this pseudo can be
+// used instead. It will be removed unconditionally at pre-emit time (prior to
+// branch selection).
+def UNENCODED_NOP: PPCEmitTimePseudo<(outs), (ins), "#UNENCODED_NOP", []>;
 
 // Standard shifts.  These are represented separately from the real shifts above
 // so that we can distinguish between shifts that allow 5-bit and 6-bit shift
@@ -3219,10 +3296,10 @@ def : Pat<(f64 (fpextend f32:$src)),
 // source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
 // The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
 // versions of Power.
-def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (imm),   (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
-def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(atomic_fence (i64 7), (timm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (timm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (timm), (timm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (timm), (timm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
 
 let Predicates = [HasFPU] in {
 // Additional FNMSUB patterns: -a*c + b == -(a*c - b)
@@ -4010,24 +4087,24 @@ def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)),
 def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
           (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
-def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
-                             "#ANDIo_1_EQ_BIT",
+def ANDI_rec_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
+                             "#ANDI_rec_1_EQ_BIT",
                              [(set i1:$dst, (trunc (not i32:$in)))]>;
-def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
-                             "#ANDIo_1_GT_BIT",
+def ANDI_rec_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in),
+                             "#ANDI_rec_1_GT_BIT",
                              [(set i1:$dst, (trunc i32:$in))]>;
 
-def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
-                              "#ANDIo_1_EQ_BIT8",
+def ANDI_rec_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+                              "#ANDI_rec_1_EQ_BIT8",
                               [(set i1:$dst, (trunc (not i64:$in)))]>;
-def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
-                              "#ANDIo_1_GT_BIT8",
+def ANDI_rec_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+                              "#ANDI_rec_1_GT_BIT8",
                               [(set i1:$dst, (trunc i64:$in))]>;
 
 def : Pat<(i1 (not (trunc i32:$in))),
-           (ANDIo_1_EQ_BIT $in)>;
+           (ANDI_rec_1_EQ_BIT $in)>;
 def : Pat<(i1 (not (trunc i64:$in))),
-           (ANDIo_1_EQ_BIT8 $in)>;
+           (ANDI_rec_1_EQ_BIT8 $in)>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instructions used for assembler/disassembler only
@@ -4111,22 +4188,22 @@ def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
 def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
                       "mtfsfi $BF, $U, $W", IIC_IntMFFS>;
 
-def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
-                       "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT;
+def MTFSFI_rec : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W),
+                       "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isRecordForm;
 
 def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>;
-def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>;
+def : InstAlias<"mtfsfi. $BF, $U", (MTFSFI_rec crrc:$BF, i32imm:$U, 0)>;
 
 let Predicates = [HasFPU] in {
 def MTFSF : XFLForm_1<63, 711, (outs),
                       (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
                       "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>;
-def MTFSFo : XFLForm_1<63, 711, (outs),
+def MTFSF_rec : XFLForm_1<63, 711, (outs),
                        (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W),
-                       "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT;
+                       "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isRecordForm;
 
 def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>;
-def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>;
+def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSF_rec i32imm:$FLM, f8rc:$FRB, 0, 0)>;
 }
 
 def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
@@ -4144,8 +4221,8 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
 def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
 
 let Defs = [CR0] in
-def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
-                         "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT;
+def SLBFEE_rec : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
+                         "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isRecordForm;
 
 def TLBIA : XForm_0<31, 370, (outs), (ins),
                         "tlbia", IIC_SprTLBIA, []>;
@@ -4188,7 +4265,7 @@ def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
 def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
                               (ins gprc:$RST, gprc:$A, gprc:$B),
                               "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
-                              Requires<[IsPPC4xx]>, isDOT;
+                              Requires<[IsPPC4xx]>, isRecordForm;
 
 def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
 
@@ -4406,10 +4483,10 @@ def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
 def : InstAlias<"xnop", (XORI R0, R0, 0)>;
 
 def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
 
 def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
-def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8_rec g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
 
 def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
 
@@ -4475,13 +4552,13 @@ def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
                          (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
 def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
                          (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
-def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+def SUBIC_rec : PPCAsmPseudo<"subic. $rA, $rB, $imm",
                           (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
 
 def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
 def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
-def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8_rec g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
 
 def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
 def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
@@ -4534,109 +4611,109 @@ def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
 
 def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
                           (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
+def EXTLWI_rec : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
                            (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
 def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b",
                           (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
+def EXTRWI_rec : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
                            (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
 def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b",
                           (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
+def INSLWI_rec : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
                            (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
 def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b",
                           (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
-def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
+def INSRWI_rec : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
                            (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
 def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n",
                           (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
+def ROTRWI_rec : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
                            (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
 def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
                         (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n",
+def SLWI_rec : PPCAsmPseudo<"slwi. $rA, $rS, $n",
                          (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
 def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
                         (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n",
+def SRWI_rec : PPCAsmPseudo<"srwi. $rA, $rS, $n",
                          (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
 def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n",
                           (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
-def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
+def CLRRWI_rec : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
                            (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
 def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
                             (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
-def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
+def CLRLSLWI_rec : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
                              (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
 
 def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
-def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
 def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
-def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNM_rec gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
 def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
-def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINM_rec gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
 
 def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
-def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZW_rec gprc:$rA, gprc:$rS)>;
 // The POWER variant
 def : MnemonicAlias<"cntlz",  "cntlzw">;
 def : MnemonicAlias<"cntlz.", "cntlzw.">;
 
 def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
                           (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
+def EXTLDI_rec : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
                            (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
 def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b",
                           (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
+def EXTRDI_rec : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
                            (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
 def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b",
                           (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
-def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
+def INSRDI_rec : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
                            (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
 def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n",
                           (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
+def ROTRDI_rec : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
                            (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
 def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
                         (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n",
+def SLDI_rec : PPCAsmPseudo<"sldi. $rA, $rS, $n",
                          (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
 def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
                         (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n",
+def SRDI_rec : PPCAsmPseudo<"srdi. $rA, $rS, $n",
                          (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
 def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n",
                           (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
+def CLRRDI_rec : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
                            (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
 def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n",
                             (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
-def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
+def CLRLSLDI_rec : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
                              (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
 def SUBPCIS : PPCAsmPseudo<"subpcis $RT, $D", (ins g8rc:$RT, s16imm:$D)>;
 
 def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
-def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
 def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
-def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCL_rec g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
 def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 def : InstAlias<"clrldi $rA, $rS, $n",
                 (RLDICL_32_64 g8rc:$rA, gprc:$rS, 0, u6imm:$n)>;
-def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICL_rec g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 def : InstAlias<"lnia $RT", (ADDPCIS g8rc:$RT, 0)>;
 
 def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
                             (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+def RLWINMbm_rec : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
                             (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
 def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+def RLWIMIbm_rec : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
                             (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
 def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
                           (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
-def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+def RLWNMbm_rec : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
 
 // These generic branch instruction forms are used for the assembler parser only.
@@ -4865,7 +4942,7 @@ let mayStore = 1 in
 def CP_PASTE  : X_L1_RA5_RB5<31, 902, "paste" , gprc, IIC_LdStPASTE, []>;
 
 let mayStore = 1, Defs = [CR0] in
-def CP_PASTEo : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isDOT;
+def CP_PASTE_rec : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isRecordForm;
 
 def CP_COPYx  : PPCAsmPseudo<"copy $rA, $rB" , (ins gprc:$rA, gprc:$rB)>;
 def CP_PASTEx : PPCAsmPseudo<"paste $rA, $rB", (ins gprc:$rA, gprc:$rB)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 2aad5860d87f..be6b30ffa08b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -120,11 +120,11 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
                        [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
     let Defs = [CR6] in
-    def o    : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+    def _rec    : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
                        [(set InTy:$XT,
                                 (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
-                       isDOT;
+                       isRecordForm;
   }
 }
 
@@ -152,7 +152,6 @@ def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
 let Predicates = [HasVSX] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 let hasSideEffects = 0 in { // VSX instructions don't have side effects.
-let Uses = [RM] in {
 
   // Load indexed instructions
   let mayLoad = 1, mayStore = 0 in {
@@ -214,6 +213,7 @@ let Uses = [RM] in {
     }
   } // mayStore
 
+  let Uses = [RM] in {
   // Add/Mul Instructions
   let isCommutable = 1 in {
     def XSADDDP : XX3Form<60, 32,
@@ -1255,6 +1255,55 @@ def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
 } // AddedComplexity
 } // HasVSX
 
+def FpMinMax {
+  dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+                                          (COPY_TO_REGCLASS $B, VSFRC)),
+                                 VSSRC);
+  dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+                                          (COPY_TO_REGCLASS $B, VSFRC)),
+                                 VSSRC);
+}
+
+let AddedComplexity = 400, Predicates = [HasVSX] in {
+  // f32 Min.
+  def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+            (f32 FpMinMax.F32Min)>;
+  def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+            (f32 FpMinMax.F32Min)>;
+  def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Min)>;
+  def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Min)>;
+  // F32 Max.
+  def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+            (f32 FpMinMax.F32Max)>;
+  def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+            (f32 FpMinMax.F32Max)>;
+  def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Max)>;
+  def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Max)>;
+
+  // f64 Min.
+  def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+            (f64 (XSMINDP $A, $B))>;
+  def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+            (f64 (XSMINDP $A, $B))>;
+  def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+            (f64 (XSMINDP $A, $B))>;
+  def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+            (f64 (XSMINDP $A, $B))>;
+  // f64 Max.
+  def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+            (f64 (XSMAXDP $A, $B))>;
+  def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+            (f64 (XSMAXDP $A, $B))>;
+  def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+            (f64 (XSMAXDP $A, $B))>;
+  def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+            (f64 (XSMAXDP $A, $B))>;
+}
+
 def ScalarLoads {
   dag Li8 =       (i32 (extloadi8 xoaddr:$src));
   dag ZELi8 =     (i32 (zextloadi8 xoaddr:$src));
@@ -1330,7 +1379,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                        [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
 
   // VSX scalar loads introduced in ISA 2.07
-  let mayLoad = 1, mayStore = 0 in {
+  let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
     let CodeSize = 3 in
     def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
                          "lxsspx $XT, $src", IIC_LdStLFD, []>;
@@ -1355,7 +1404,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
   } // mayLoad
 
   // VSX scalar stores introduced in ISA 2.07
-  let mayStore = 1, mayLoad = 0 in {
+  let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
     let CodeSize = 3 in
     def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
                           "stxsspx $XT, $dst", IIC_LdStSTFD, []>;
@@ -1912,7 +1961,7 @@ def VectorExtractions {
       - The order of elements after the move to GPR is reversed, so we invert
         the bits of the index prior to truncating to the range 0-7
   */
-  dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDIo8 $Idx, 8)));
+  dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8)));
   dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC));
   dag BE_MV_VBYTE = (MFVSRD
                       (EXTRACT_SUBREG
@@ -1931,7 +1980,7 @@ def VectorExtractions {
         the bits of the index prior to truncating to the range 0-3
   */
   dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8,
-                                       (RLDICR (ANDIo8 $Idx, 4), 1, 62)));
+                                       (RLDICR (ANDI8_rec $Idx, 4), 1, 62)));
   dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC));
   dag BE_MV_VHALF = (MFVSRD
                       (EXTRACT_SUBREG
@@ -1949,7 +1998,7 @@ def VectorExtractions {
         the bits of the index prior to truncating to the range 0-1
   */
   dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
-                                       (RLDICR (ANDIo8 $Idx, 2), 2, 61)));
+                                       (RLDICR (ANDI8_rec $Idx, 2), 2, 61)));
   dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC));
   dag BE_MV_VWORD = (MFVSRD
                       (EXTRACT_SUBREG
@@ -1965,7 +2014,7 @@ def VectorExtractions {
       element indices.
   */
   dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
-                                        (RLDICR (ANDIo8 $Idx, 1), 3, 60)));
+                                        (RLDICR (ANDI8_rec $Idx, 1), 3, 60)));
   dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC));
   dag BE_VARIABLE_DWORD =
         (MFVSRD (EXTRACT_SUBREG
@@ -2477,6 +2526,43 @@ def : Pat<(i64 (bitconvert f64:$S)),
 // (move to FPR, nothing else needed)
 def : Pat<(f64 (bitconvert i64:$S)),
           (f64 (MTVSRD $S))>;
+
+// Rounding to integer.
+def : Pat<(i64 (lrint f64:$S)),
+          (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (lrint f32:$S)),
+          (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (llrint f64:$S)),
+          (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (llrint f32:$S)),
+          (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (lround f64:$S)),
+          (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (lround f32:$S)),
+          (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+def : Pat<(i64 (llround f64:$S)),
+          (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (llround f32:$S)),
+          (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+}
+
+let Predicates = [HasVSX] in {
+// Rounding for single precision.
+def : Pat<(f32 (fround f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSRDPI
+                                   (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fnearbyint f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSRDPIC
+                                   (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ffloor f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSRDPIM
+                                   (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fceil f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSRDPIP
+                                   (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ftrunc f32:$S)),
+          (f32 (COPY_TO_REGCLASS (XSRDPIZ
+                                   (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
 }
 
 // Materialize a zero-vector of long long
@@ -2502,7 +2588,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
   class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
                          list<dag> pattern>
-    : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT;
+    : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
 
   // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
   // So we use different operand class for VRB
@@ -2520,7 +2606,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
   class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
                          list<dag> pattern>
-    : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
+    : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
 
   // [PO T XO B XO BX /]
   class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
@@ -2550,7 +2636,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
   class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
                          list<dag> pattern>
-    : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT;
+    : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
 
   // [PO VRT VRA VRB XO /]
   class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
@@ -2562,7 +2648,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
   class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
                           list<dag> pattern>
-    : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isDOT;
+    : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
 
   //===--------------------------------------------------------------------===//
   // Quad-Precision Scalar Move Instructions:
@@ -2884,13 +2970,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   //===--------------------------------------------------------------------===//
 
   // Maximum/Minimum Type-C/Type-J DP
-  // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT
-  def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc,
-                                 IIC_VecFP, []>;
+  def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
+                                 IIC_VecFP,
+                                 [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
   def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
                                  IIC_VecFP, []>;
-  def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc,
-                                 IIC_VecFP, []>;
+  def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
+                                 IIC_VecFP,
+                                 [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
   def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
                                  IIC_VecFP, []>;
 
@@ -2898,18 +2985,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // Vector Byte-Reverse H/W/D/Q Word
   def XXBRH : XX2_XT6_XO5_XB6<60,  7, 475, "xxbrh", vsrc, []>;
-  def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc, []>;
-  def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>;
+  def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
+    [(set v4i32:$XT, (bswap v4i32:$XB))]>;
+  def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
+    [(set v2i64:$XT, (bswap v2i64:$XB))]>;
   def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
 
   // Vector Reverse
-  def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)),
+  def : Pat<(v8i16 (bswap v8i16 :$A)),
             (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
-  def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)),
-            (v4i32 (XXBRW $A))>;
-  def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)),
-            (v2i64 (XXBRD $A))>;
-  def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)),
+  def : Pat<(v1i128 (bswap v1i128 :$A)),
             (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
 
   // Vector Permute
@@ -2927,7 +3012,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
   // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
-  let mayLoad = 1, mayStore = 0 in {
+  let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
                             "lxv $XT, $src", IIC_LdStLFD, []>;
@@ -2972,7 +3057,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
   // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
-  let mayStore = 1, mayLoad = 0 in {
+  let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
                              "stxv $XT, $dst", IIC_LdStSTFD, []>;
@@ -3697,6 +3782,15 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def : Pat<(f128 (fpextend f32:$src)),
             (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
 
+  def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
+            (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
+                                             (COPY_TO_REGCLASS $XB, VSSRC)),
+                                   VSSRC))>;
+  def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
+            (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
+                                             (COPY_TO_REGCLASS $XB, VSSRC)),
+                                   VSSRC))>;
+
 } // end HasP9Vector, AddedComplexity
 
 let AddedComplexity = 400 in {
@@ -3710,7 +3804,7 @@ let AddedComplexity = 400 in {
   }
 }
 
-let Predicates = [HasP9Vector] in {
+let Predicates = [HasP9Vector], hasSideEffects = 0 in {
   let mayStore = 1 in {
     def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
                                           (ins spilltovsrrc:$XT, memrr:$dst),
@@ -3865,8 +3959,20 @@ def DblToULongLoad {
   dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A)))));
 }
 
+// FP load dags (for f32 -> v4f32)
+def LoadFP {
+  dag A = (f32 (load xoaddr:$A));
+  dag B = (f32 (load xoaddr:$B));
+  dag C = (f32 (load xoaddr:$C));
+  dag D = (f32 (load xoaddr:$D));
+}
+
 // FP merge dags (for f32 -> v4f32)
 def MrgFP {
+  dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC);
+  dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC);
+  dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC);
+  dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC);
   dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
                                (COPY_TO_REGCLASS $C, VSRC), 0));
   dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
@@ -4022,7 +4128,18 @@ let AddedComplexity = 400 in {
               (v2f64 (XXPERMDI
                         (COPY_TO_REGCLASS $A, VSRC),
                         (COPY_TO_REGCLASS $B, VSRC), 0))>;
-
+    // Using VMRGEW to assemble the final vector would be a lower latency
+    // solution. However, we choose to go with the slightly higher latency
+    // XXPERMDI for 2 reasons:
+    // 1. This is likely to occur in unrolled loops where regpressure is high,
+    //    so we want to use the latter as it has access to all 64 VSX registers.
+    // 2. Using Altivec instructions in this sequence would likely cause the
+    //    allocation of Altivec registers even for the loads which in turn would
+    //    force the use of LXSIWZX for the loads, adding a cycle of latency to
+    //    each of the loads which would otherwise be able to use LFIWZX.
+    def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+              (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
+                               (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
     def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
               (VMRGEW MrgFP.AC, MrgFP.BD)>;
     def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
@@ -4089,7 +4206,18 @@ let AddedComplexity = 400 in {
               (v2f64 (XXPERMDI
                         (COPY_TO_REGCLASS $B, VSRC),
                         (COPY_TO_REGCLASS $A, VSRC), 0))>;
-
+    // Using VMRGEW to assemble the final vector would be a lower latency
+    // solution. However, we choose to go with the slightly higher latency
+    // XXPERMDI for 2 reasons:
+    // 1. This is likely to occur in unrolled loops where regpressure is high,
+    //    so we want to use the latter as it has access to all 64 VSX registers.
+    // 2. Using Altivec instructions in this sequence would likely cause the
+    //    allocation of Altivec registers even for the loads which in turn would
+    //    force the use of LXSIWZX for the loads, adding a cycle of latency to
+    //    each of the loads which would otherwise be able to use LFIWZX.
+    def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+              (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
+                               (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
     def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
               (VMRGEW MrgFP.AC, MrgFP.BD)>;
     def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
new file mode 100644
index 000000000000..b761f337533b
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -0,0 +1,894 @@
+//===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to prepare loops for ppc preferred addressing
+// modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with
+// update)
+// Additional PHIs are created for loop induction variables used by load/store
+// instructions so that preferred addressing modes can be used.
+//
+// 1: DS/DQ form preparation, prepare the load/store instructions so that they
+//    can satisfy the DS/DQ form displacement requirements.
+//    Generically, this means transforming loops like this:
+//    for (int i = 0; i < n; ++i) {
+//      unsigned long x1 = *(unsigned long *)(p + i + 5);
+//      unsigned long x2 = *(unsigned long *)(p + i + 9);
+//    }
+//
+//    to look like this:
+//
+//    unsigned NewP = p + 5;
+//    for (int i = 0; i < n; ++i) {
+//      unsigned long x1 = *(unsigned long *)(i + NewP);
+//      unsigned long x2 = *(unsigned long *)(i + NewP + 4);
+//    }
+//
+// 2: D/DS form with update preparation, prepare the load/store instructions so
+//    that we can use update form to do pre-increment.
+//    Generically, this means transforming loops like this:
+//    for (int i = 0; i < n; ++i)
+//      array[i] = c;
+//
+//    to look like this:
+//
+//    T *p = array[-1];
+//    for (int i = 0; i < n; ++i)
+//      *++p = c;
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-loop-instr-form-prep"
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+// By default, we limit this to creating 16 common bases out of loops per
+// function. 16 is a little over half of the allocatable register set.
+static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
+                                 cl::Hidden, cl::init(16),
+  cl::desc("Potential common base number threshold per function for PPC loop "
+           "prep"));
+
+static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
+                                 cl::init(true), cl::Hidden,
+  cl::desc("prefer update form when ds form is also a update form"));
+
+// Sum of following 3 per loop thresholds for all loops can not be larger
+// than MaxVarsPrep.
+// By default, we limit this to creating 9 PHIs for one loop.
+// 9 and 3 for each kind prep are exterimental values on Power9.
+static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
+                                 cl::Hidden, cl::init(3),
+  cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
+           "form"));
+
+static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
+                                 cl::Hidden, cl::init(3),
+  cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
+
+static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
+                                 cl::Hidden, cl::init(3),
+  cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
+
+
+// If would not be profitable if the common base has only one load/store, ISEL
+// should already be able to choose best load/store form based on offset for
+// single load/store. Set minimal profitable value default to 2 and make it as
+// an option.
+static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
+                                    cl::Hidden, cl::init(2),
+  cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
+           "preparation"));
+
+STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
+STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
+STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
+STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
+STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
+STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
+
+namespace {
+  struct BucketElement {
+    BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+    BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
+
+    const SCEVConstant *Offset;
+    Instruction *Instr;
+  };
+
+  struct Bucket {
+    Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+                                            Elements(1, BucketElement(I)) {}
+
+    const SCEV *BaseSCEV;
+    SmallVector<BucketElement, 16> Elements;
+  };
+
+  // "UpdateForm" is not a real PPC instruction form, it stands for dform
+  // load/store with update like ldu/stdu, or Prefetch intrinsic.
+  // For DS form instructions, their displacements must be multiple of 4.
+  // For DQ form instructions, their displacements must be multiple of 16.
+  enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 };
+
+  class PPCLoopInstrFormPrep : public FunctionPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+
+    PPCLoopInstrFormPrep() : FunctionPass(ID) {
+      initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
+    }
+
+    PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+      initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
+    }
+
+    bool runOnFunction(Function &F) override;
+
+  private:
+    PPCTargetMachine *TM = nullptr;
+    const PPCSubtarget *ST; 
+    DominatorTree *DT;
+    LoopInfo *LI;
+    ScalarEvolution *SE;
+    bool PreserveLCSSA;
+
+    /// Successful preparation number for Update/DS/DQ form in all inner most
+    /// loops. One successful preparation will put one common base out of loop,
+    /// this may leads to register presure like LICM does.
+    /// Make sure total preparation number can be controlled by option.
+    unsigned SuccPrepCount;
+
+    bool runOnLoop(Loop *L);
+
+    /// Check if required PHI node is already exist in Loop \p L.
+    bool alreadyPrepared(Loop *L, Instruction* MemI,
+                         const SCEV *BasePtrStartSCEV,
+                         const SCEVConstant *BasePtrIncSCEV,
+                         InstrForm Form);
+
+    /// Collect condition matched(\p isValidCandidate() returns true)
+    /// candidates in Loop \p L.
+    SmallVector<Bucket, 16>
+    collectCandidates(Loop *L,
+                      std::function<bool(const Instruction *, const Value *)>
+                          isValidCandidate,
+                      unsigned MaxCandidateNum);
+
+    /// Add a candidate to candidates \p Buckets.
+    void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+                         SmallVector<Bucket, 16> &Buckets,
+                         unsigned MaxCandidateNum);
+
+    /// Prepare all candidates in \p Buckets for update form.
+    bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
+
+    /// Prepare all candidates in \p Buckets for displacement form, now for
+    /// ds/dq.
+    bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
+                      InstrForm Form);
+
+    /// Prepare for one chain \p BucketChain, find the best base element and
+    /// update all other elements in \p BucketChain accordingly.
+    /// \p Form is used to find the best base element.
+    /// If success, best base element must be stored as the first element of
+    /// \p BucketChain.
+    /// Return false if no base element found, otherwise return true.
+    bool prepareBaseForDispFormChain(Bucket &BucketChain,
+                                     InstrForm Form);
+
+    /// Prepare for one chain \p BucketChain, find the best base element and
+    /// update all other elements in \p BucketChain accordingly.
+    /// If success, best base element must be stored as the first element of
+    /// \p BucketChain.
+    /// Return false if no base element found, otherwise return true.
+    bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
+
+    /// Rewrite load/store instructions in \p BucketChain according to
+    /// preparation.
+    bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
+                           SmallSet<BasicBlock *, 16> &BBChanged,
+                           InstrForm Form);
+  };
+
+} // end anonymous namespace
+
+char PPCLoopInstrFormPrep::ID = 0;
+static const char *name = "Prepare loop for ppc preferred instruction forms";
+INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)
+
+static const std::string PHINodeNameSuffix    = ".phi";
+static const std::string CastNodeNameSuffix   = ".cast";
+static const std::string GEPNodeIncNameSuffix = ".inc";
+static const std::string GEPNodeOffNameSuffix = ".off";
+
+FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) {
+  return new PPCLoopInstrFormPrep(TM);
+}
+
+static bool IsPtrInBounds(Value *BasePtr) {
+  Value *StrippedBasePtr = BasePtr;
+  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
+    StrippedBasePtr = BC->getOperand(0);
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
+    return GEP->isInBounds();
+
+  return false;
+}
+
+static std::string getInstrName(const Value *I, const std::string Suffix) {
+  assert(I && "Invalid paramater!");
+  if (I->hasName())
+    return (I->getName() + Suffix).str();
+  else
+    return ""; 
+}
+
+static Value *GetPointerOperand(Value *MemI) {
+  if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
+    return LMemI->getPointerOperand();
+  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
+    return SMemI->getPointerOperand();
+  } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
+    if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
+      return IMemI->getArgOperand(0);
+  }
+
+  return nullptr;
+}
+
+bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+  ST = TM ? TM->getSubtargetImpl(F) : nullptr;
+  SuccPrepCount = 0;
+
+  bool MadeChange = false;
+
+  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+      MadeChange |= runOnLoop(*L);
+
+  return MadeChange;
+}
+
+void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
+                                        SmallVector<Bucket, 16> &Buckets,
+                                        unsigned MaxCandidateNum) {
+  assert((MemI && GetPointerOperand(MemI)) &&
+         "Candidate should be a memory instruction.");
+  assert(LSCEV && "Invalid SCEV for Ptr value.");
+  bool FoundBucket = false;
+  for (auto &B : Buckets) {
+    const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+    if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+      B.Elements.push_back(BucketElement(CDiff, MemI));
+      FoundBucket = true;
+      break;
+    }
+  }
+
+  if (!FoundBucket) {
+    if (Buckets.size() == MaxCandidateNum)
+      return;
+    Buckets.push_back(Bucket(LSCEV, MemI));
+  }
+}
+
+SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
+    Loop *L,
+    std::function<bool(const Instruction *, const Value *)> isValidCandidate,
+    unsigned MaxCandidateNum) {
+  SmallVector<Bucket, 16> Buckets;
+  for (const auto &BB : L->blocks())
+    for (auto &J : *BB) {
+      Value *PtrValue;
+      Instruction *MemI;
+
+      if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) {
+        MemI = LMemI;
+        PtrValue = LMemI->getPointerOperand();
+      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) {
+        MemI = SMemI;
+        PtrValue = SMemI->getPointerOperand();
+      } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
+        if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
+          MemI = IMemI;
+          PtrValue = IMemI->getArgOperand(0);
+        } else continue;
+      } else continue;
+
+      unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
+      if (PtrAddrSpace)
+        continue;
+
+      if (L->isLoopInvariant(PtrValue))
+        continue;
+
+      const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+      const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
+      if (!LARSCEV || LARSCEV->getLoop() != L)
+        continue;
+
+      if (isValidCandidate(&J, PtrValue))
+        addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum);
+    }
+  return Buckets;
+}
+
+bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
+                                                    InstrForm Form) {
+  // RemainderOffsetInfo details:
+  // key:            value of (Offset urem DispConstraint). For DSForm, it can
+  //                 be [0, 4).
+  // first of pair:  the index of first BucketElement whose remainder is equal
+  //                 to key. For key 0, this value must be 0.
+  // second of pair: number of load/stores with the same remainder.
+  DenseMap<unsigned, std::pair<unsigned, unsigned>> RemainderOffsetInfo;
+
+  for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
+    if (!BucketChain.Elements[j].Offset)
+      RemainderOffsetInfo[0] = std::make_pair(0, 1);
+    else {
+      unsigned Remainder =
+          BucketChain.Elements[j].Offset->getAPInt().urem(Form);
+      if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
+        RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
+      else
+        RemainderOffsetInfo[Remainder].second++;
+    }
+  }
+  // Currently we choose the most profitable base as the one which has the max
+  // number of load/store with same remainder.
+  // FIXME: adjust the base selection strategy according to load/store offset
+  // distribution.
+  // For example, if we have one candidate chain for DS form preparation, which
+  // contains following load/stores with different remainders:
+  // 1: 10 load/store whose remainder is 1;
+  // 2: 9 load/store whose remainder is 2;
+  // 3: 1 for remainder 3 and 0 for remainder 0; 
+  // Now we will choose the first load/store whose remainder is 1 as base and
+  // adjust all other load/stores according to new base, so we will get 10 DS
+  // form and 10 X form.
+  // But we should be more clever, for this case we could use two bases, one for
+  // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
+  // X form.
+  unsigned MaxCountRemainder = 0;
+  for (unsigned j = 0; j < (unsigned)Form; j++)
+    if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
+        RemainderOffsetInfo[j].second >
+            RemainderOffsetInfo[MaxCountRemainder].second)
+      MaxCountRemainder = j;
+
+  // Abort when there are too few insts with common base.
+  if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold)
+    return false;
+
+  // If the first value is most profitable, no needed to adjust BucketChain
+  // elements as they are substracted the first value when collecting.
+  if (MaxCountRemainder == 0)
+    return true;
+
+  // Adjust load/store to the new chosen base.
+  const SCEV *Offset =
+      BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;
+  BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
+  for (auto &E : BucketChain.Elements) {
+    if (E.Offset)
+      E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+    else
+      E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+  }
+
+  std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],
+            BucketChain.Elements[0]);
+  return true;
+}
+
+// FIXME: implement a more clever base choosing policy.
+// Currently we always choose an exist load/store offset. This maybe lead to
+// suboptimal code sequences. For example, for one DS chain with offsets
+// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
+// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
+// multipler of 4, it cannot be represented by sint16.
+bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
+  // We have a choice now of which instruction's memory operand we use as the
+  // base for the generated PHI. Always picking the first instruction in each
+  // bucket does not work well, specifically because that instruction might
+  // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+  // the choice is somewhat arbitrary, because the backend will happily
+  // generate direct offsets from both the pre-incremented and
+  // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+  // instruction in each bucket, and adjust the recurrence and other offsets
+  // accordingly.
+  for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
+    if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))
+      if (II->getIntrinsicID() == Intrinsic::prefetch)
+        continue;
+
+    // If we'd otherwise pick the first element anyway, there's nothing to do.
+    if (j == 0)
+      break;
+
+    // If our chosen element has no offset from the base pointer, there's
+    // nothing to do.
+    if (!BucketChain.Elements[j].Offset ||
+        BucketChain.Elements[j].Offset->isZero())
+      break;
+
+    const SCEV *Offset = BucketChain.Elements[j].Offset;
+    BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
+    for (auto &E : BucketChain.Elements) {
+      if (E.Offset)
+        E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+      else
+        E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+    }
+
+    std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);
+    break;
+  }
+  return true;
+}
+
+bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
+                                          SmallSet<BasicBlock *, 16> &BBChanged,
+                                          InstrForm Form) {
+  bool MadeChange = false;
+  const SCEVAddRecExpr *BasePtrSCEV =
+      cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
+  if (!BasePtrSCEV->isAffine())
+    return MadeChange;
+
+  LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+
+  assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
+
+  // The instruction corresponding to the Bucket's BaseSCEV must be the first
+  // in the vector of elements.
+  Instruction *MemI = BucketChain.Elements.begin()->Instr;
+  Value *BasePtr = GetPointerOperand(MemI);
+  assert(BasePtr && "No pointer operand");
+
+  Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
+  Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
+    BasePtr->getType()->getPointerAddressSpace());
+
+  if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
+    return MadeChange;
+
+  const SCEVConstant *BasePtrIncSCEV =
+    dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
+  if (!BasePtrIncSCEV)
+    return MadeChange;
+
+  // For some DS form load/store instructions, it can also be an update form,
+  // if the stride is a multipler of 4. Use update form if prefer it.
+  bool CanPreInc = (Form == UpdateForm ||
+                    ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
+                     PreferUpdateForm));
+  const SCEV *BasePtrStartSCEV = nullptr;
+  if (CanPreInc)
+    BasePtrStartSCEV =
+        SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
+  else
+    BasePtrStartSCEV = BasePtrSCEV->getStart();
+
+  if (!isSafeToExpand(BasePtrStartSCEV, *SE))
+    return MadeChange;
+
+  if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
+    return MadeChange;
+
+  LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+  BasicBlock *Header = L->getHeader();
+  unsigned HeaderLoopPredCount = pred_size(Header);
+  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+
+  PHINode *NewPHI =
+      PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+                      getInstrName(MemI, PHINodeNameSuffix),
+                      Header->getFirstNonPHI());
+
+  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
+  Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+                                            LoopPredecessor->getTerminator());
+
+  // Note that LoopPredecessor might occur in the predecessor list multiple
+  // times, and we need to add it the right number of times.
+  for (auto PI : predecessors(Header)) {
+    if (PI != LoopPredecessor)
+      continue;
+
+    NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+  }
+
+  Instruction *PtrInc = nullptr;
+  Instruction *NewBasePtr = nullptr;
+  if (CanPreInc) {
+    Instruction *InsPoint = &*Header->getFirstInsertionPt();
+    PtrInc = GetElementPtrInst::Create(
+        I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+        getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+    cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+    for (auto PI : predecessors(Header)) {
+      if (PI == LoopPredecessor)
+        continue;
+
+      NewPHI->addIncoming(PtrInc, PI);
+    }
+    if (PtrInc->getType() != BasePtr->getType())
+      NewBasePtr = new BitCastInst(
+          PtrInc, BasePtr->getType(),
+          getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
+    else
+      NewBasePtr = PtrInc;
+  } else {
+    // Note that LoopPredecessor might occur in the predecessor list multiple
+    // times, and we need to make sure no more incoming value for them in PHI.
+    for (auto PI : predecessors(Header)) {
+      if (PI == LoopPredecessor)
+        continue;
+
+      // For the latch predecessor, we need to insert a GEP just before the
+      // terminator to increase the address.
+      BasicBlock *BB = PI;
+      Instruction *InsPoint = BB->getTerminator();
+      PtrInc = GetElementPtrInst::Create(
+          I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
+          getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
+
+      cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+
+      NewPHI->addIncoming(PtrInc, PI);
+    }
+    PtrInc = NewPHI;
+    if (NewPHI->getType() != BasePtr->getType())
+      NewBasePtr =
+          new BitCastInst(NewPHI, BasePtr->getType(),
+                          getInstrName(NewPHI, CastNodeNameSuffix),
+                          &*Header->getFirstInsertionPt());
+    else
+      NewBasePtr = NewPHI;
+  }
+
+  if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
+    BBChanged.insert(IDel->getParent());
+  BasePtr->replaceAllUsesWith(NewBasePtr);
+  RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
+
+  // Keep track of the replacement pointer values we've inserted so that we
+  // don't generate more pointer values than necessary.
+  SmallPtrSet<Value *, 16> NewPtrs;
+  NewPtrs.insert(NewBasePtr);
+
+  for (auto I = std::next(BucketChain.Elements.begin()),
+       IE = BucketChain.Elements.end(); I != IE; ++I) {
+    Value *Ptr = GetPointerOperand(I->Instr);
+    assert(Ptr && "No pointer operand");
+    if (NewPtrs.count(Ptr))
+      continue;
+
+    Instruction *RealNewPtr;
+    if (!I->Offset || I->Offset->getValue()->isZero()) {
+      RealNewPtr = NewBasePtr;
+    } else {
+      Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+      if (PtrIP && isa<Instruction>(NewBasePtr) &&
+          cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+        PtrIP = nullptr;
+      else if (PtrIP && isa<PHINode>(PtrIP))
+        PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
+      else if (!PtrIP)
+        PtrIP = I->Instr;
+
+      GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+          I8Ty, PtrInc, I->Offset->getValue(),
+          getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP);
+      if (!PtrIP)
+        NewPtr->insertAfter(cast<Instruction>(PtrInc));
+      NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+      RealNewPtr = NewPtr;
+    }
+
+    if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+      BBChanged.insert(IDel->getParent());
+
+    Instruction *ReplNewPtr;
+    if (Ptr->getType() != RealNewPtr->getType()) {
+      ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+        getInstrName(Ptr, CastNodeNameSuffix));
+      ReplNewPtr->insertAfter(RealNewPtr);
+    } else
+      ReplNewPtr = RealNewPtr;
+
+    Ptr->replaceAllUsesWith(ReplNewPtr);
+    RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+
+    NewPtrs.insert(RealNewPtr);
+  }
+
+  MadeChange = true;
+
+  SuccPrepCount++;  
+
+  if (Form == DSForm && !CanPreInc)
+    DSFormChainRewritten++;
+  else if (Form == DQForm)
+    DQFormChainRewritten++;
+  else if (Form == UpdateForm || (Form == DSForm && CanPreInc))
+    UpdFormChainRewritten++;
+
+  return MadeChange;
+}
+
+bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
+                                       SmallVector<Bucket, 16> &Buckets) {
+  bool MadeChange = false;
+  if (Buckets.empty())
+    return MadeChange;
+  SmallSet<BasicBlock *, 16> BBChanged;
+  for (auto &Bucket : Buckets)
+    // The base address of each bucket is transformed into a phi and the others
+    // are rewritten based on new base.
+    if (prepareBaseForUpdateFormChain(Bucket))
+      MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
+
+  if (MadeChange)
+    for (auto &BB : L->blocks())
+      if (BBChanged.count(BB))
+        DeleteDeadPHIs(BB);
+  return MadeChange;
+}
+
+bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
+                                     InstrForm Form) {
+  bool MadeChange = false;
+
+  if (Buckets.empty())
+    return MadeChange;
+
+  SmallSet<BasicBlock *, 16> BBChanged;
+  for (auto &Bucket : Buckets) {
+    if (Bucket.Elements.size() < DispFormPrepMinThreshold)
+      continue;
+    if (prepareBaseForDispFormChain(Bucket, Form))
+      MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form);
+  }
+
+  if (MadeChange)
+    for (auto &BB : L->blocks())
+      if (BBChanged.count(BB))
+        DeleteDeadPHIs(BB);
+  return MadeChange;
+}
+
+// In order to prepare for the preferred instruction form, a PHI is added.
+// This function will check to see if that PHI already exists and will return
+// true if it found an existing PHI with the matched start and increment as the
+// one we wanted to create.
+bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
+                                        const SCEV *BasePtrStartSCEV,
+                                        const SCEVConstant *BasePtrIncSCEV,
+                                        InstrForm Form) {
+  BasicBlock *BB = MemI->getParent();
+  if (!BB)
+    return false;
+
+  BasicBlock *PredBB = L->getLoopPredecessor();
+  BasicBlock *LatchBB = L->getLoopLatch();
+
+  if (!PredBB || !LatchBB)
+    return false;
+
+  // Run through the PHIs and see if we have some that looks like a preparation
+  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
+  for (auto & CurrentPHI : PHIIter) {
+    PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
+    if (!CurrentPHINode)
+      continue;
+
+    if (!SE->isSCEVable(CurrentPHINode->getType()))
+      continue;
+
+    const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
+
+    const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
+    if (!PHIBasePtrSCEV)
+      continue;
+
+    const SCEVConstant *PHIBasePtrIncSCEV =
+      dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE));
+    if (!PHIBasePtrIncSCEV)
+      continue;
+
+    if (CurrentPHINode->getNumIncomingValues() == 2) {
+      if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
+           CurrentPHINode->getIncomingBlock(1) == PredBB) ||
+          (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
+           CurrentPHINode->getIncomingBlock(0) == PredBB)) {
+        if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
+          // The existing PHI (CurrentPHINode) has the same start and increment
+          // as the PHI that we wanted to create.
+          if (Form == UpdateForm &&
+              PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
+            ++PHINodeAlreadyExistsUpdate;
+            return true;
+          } 
+          if (Form == DSForm || Form == DQForm) {
+            const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
+                SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
+            if (Diff && !Diff->getAPInt().urem(Form)) {
+              if (Form == DSForm)
+                ++PHINodeAlreadyExistsDS;
+              else
+                ++PHINodeAlreadyExistsDQ;
+              return true;
+            }
+          } 
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
+  bool MadeChange = false;
+
+  // Only prep. the inner-most loop
+  if (!L->empty())
+    return MadeChange;
+
+  // Return if already done enough preparation.
+  if (SuccPrepCount >= MaxVarsPrep)
+    return MadeChange;
+
+  LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
+  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+  // If there is no loop predecessor, or the loop predecessor's terminator
+  // returns a value (which might contribute to determining the loop's
+  // iteration space), insert a new preheader for the loop.
+  if (!LoopPredecessor ||
+      !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
+    LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+    if (LoopPredecessor)
+      MadeChange = true;
+  }
+  if (!LoopPredecessor) {
+    LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
+    return MadeChange;
+  }
+  // Check if a load/store has update form. This lambda is used by function
+  // collectCandidates which can collect candidates for types defined by lambda.
+  auto isUpdateFormCandidate = [&] (const Instruction *I,
+                                    const Value *PtrValue) {
+    assert((PtrValue && I) && "Invalid parameter!");
+    // There are no update forms for Altivec vector load/stores.
+    if (ST && ST->hasAltivec() &&
+        PtrValue->getType()->getPointerElementType()->isVectorTy())
+      return false;
+    // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
+    // be 4's multiple (DS-form). For i64 loads/stores when the displacement
+    // fits in a 16-bit signed field but isn't a multiple of 4, it will be
+    // useless and possible to break some original well-form addressing mode
+    // to make this pre-inc prep for it.
+    if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
+      const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
+      const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
+      if (!LARSCEV || LARSCEV->getLoop() != L)
+        return false;
+      if (const SCEVConstant *StepConst =
+              dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
+        const APInt &ConstInt = StepConst->getValue()->getValue();
+        if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
+          return false;
+      }
+    }
+    return true;
+  };
+  
+  // Check if a load/store has DS form.
+  auto isDSFormCandidate = [] (const Instruction *I, const Value *PtrValue) {
+    assert((PtrValue && I) && "Invalid parameter!");
+    if (isa<IntrinsicInst>(I))
+      return false;
+    Type *PointerElementType = PtrValue->getType()->getPointerElementType();
+    return (PointerElementType->isIntegerTy(64)) ||
+           (PointerElementType->isFloatTy()) ||
+           (PointerElementType->isDoubleTy()) ||
+           (PointerElementType->isIntegerTy(32) &&
+            llvm::any_of(I->users(),
+                         [](const User *U) { return isa<SExtInst>(U); }));
+  };
+
+  // Check if a load/store has DQ form.
+  auto isDQFormCandidate = [&] (const Instruction *I, const Value *PtrValue) {
+    assert((PtrValue && I) && "Invalid parameter!");
+    return !isa<IntrinsicInst>(I) && ST && ST->hasP9Vector() &&
+           (PtrValue->getType()->getPointerElementType()->isVectorTy());
+  };
+
+  // intrinsic for update form.
+  SmallVector<Bucket, 16> UpdateFormBuckets =
+      collectCandidates(L, isUpdateFormCandidate, MaxVarsUpdateForm);
+
+  // Prepare for update form.
+  if (!UpdateFormBuckets.empty())
+    MadeChange |= updateFormPrep(L, UpdateFormBuckets);
+
+  // Collect buckets of comparable addresses used by loads and stores for DS
+  // form.
+  SmallVector<Bucket, 16> DSFormBuckets =
+      collectCandidates(L, isDSFormCandidate, MaxVarsDSForm);
+
+  // Prepare for DS form.
+  if (!DSFormBuckets.empty())
+    MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
+
+  // Collect buckets of comparable addresses used by loads and stores for DQ
+  // form.
+  SmallVector<Bucket, 16> DQFormBuckets =
+      collectCandidates(L, isDQFormCandidate, MaxVarsDQForm);
+
+  // Prepare for DQ form.
+  if (!DQFormBuckets.empty())
+    MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
+
+  return MadeChange;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
deleted file mode 100644
index d252cfbd26b1..000000000000
--- a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ /dev/null
@@ -1,605 +0,0 @@
-//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass to prepare loops for pre-increment addressing
-// modes. Additional PHIs are created for loop induction variables used by
-// load/store instructions so that the pre-increment forms can be used.
-// Generically, this means transforming loops like this:
-//   for (int i = 0; i < n; ++i)
-//     array[i] = c;
-// to look like this:
-//   T *p = array[-1];
-//   for (int i = 0; i < n; ++i)
-//     *++p = c;
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ppc-loop-preinc-prep"
-
-#include "PPC.h"
-#include "PPCSubtarget.h"
-#include "PPCTargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include <cassert>
-#include <iterator>
-#include <utility>
-
-using namespace llvm;
-
-// By default, we limit this to creating 16 PHIs (which is a little over half
-// of the allocatable register set).
-static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
-                                 cl::Hidden, cl::init(16),
-  cl::desc("Potential PHI threshold for PPC preinc loop prep"));
-
-STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
-STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
-
-namespace {
-  struct BucketElement {
-    BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
-    BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
-
-    const SCEVConstant *Offset;
-    Instruction *Instr;
-  };
-
-  struct Bucket {
-    Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
-                                            Elements(1, BucketElement(I)) {}
-
-    const SCEV *BaseSCEV;
-    SmallVector<BucketElement, 16> Elements;
-  };
-
-  class PPCLoopPreIncPrep : public FunctionPass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-
-    PPCLoopPreIncPrep() : FunctionPass(ID) {
-      initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
-    }
-
-    PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
-      initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequired<ScalarEvolutionWrapperPass>();
-    }
-
-    bool runOnFunction(Function &F) override;
-
-  private:
-    PPCTargetMachine *TM = nullptr;
-    const PPCSubtarget *ST; 
-    DominatorTree *DT;
-    LoopInfo *LI;
-    ScalarEvolution *SE;
-    bool PreserveLCSSA;
-
-    bool runOnLoop(Loop *L);
-
-    /// Check if required PHI node is already exist in Loop \p L.
-    bool alreadyPrepared(Loop *L, Instruction* MemI,
-                         const SCEV *BasePtrStartSCEV,
-                         const SCEVConstant *BasePtrIncSCEV);
-
-    /// Collect condition matched(\p isValidCandidate() returns true)
-    /// candidates in Loop \p L.
-    SmallVector<Bucket, 16>
-    collectCandidates(Loop *L,
-                      std::function<bool(const Instruction *, const Value *)>
-                          isValidCandidate,
-                      unsigned MaxCandidateNum);
-
-    /// Add a candidate to candidates \p Buckets.
-    void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
-                         SmallVector<Bucket, 16> &Buckets,
-                         unsigned MaxCandidateNum);
-
-    /// Prepare all candidates in \p Buckets for update form.
-    bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);
-
-    /// Prepare for one chain \p BucketChain, find the best base element and
-    /// update all other elements in \p BucketChain accordingly.
-    bool prepareBaseForUpdateFormChain(Bucket &BucketChain);
-
-    /// Rewrite load/store instructions in \p BucketChain according to
-    /// preparation.
-    bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
-                           SmallSet<BasicBlock *, 16> &BBChanged);
-  };
-
-} // end anonymous namespace
-
-char PPCLoopPreIncPrep::ID = 0;
-static const char *name = "Prepare loop for pre-inc. addressing modes";
-INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
-
-static const std::string PHINodeNameSuffix    = ".phi";
-static const std::string CastNodeNameSuffix   = ".cast";
-static const std::string GEPNodeIncNameSuffix = ".inc";
-static const std::string GEPNodeOffNameSuffix = ".off";
-
-FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
-  return new PPCLoopPreIncPrep(TM);
-}
-
-static bool IsPtrInBounds(Value *BasePtr) {
-  Value *StrippedBasePtr = BasePtr;
-  while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))
-    StrippedBasePtr = BC->getOperand(0);
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))
-    return GEP->isInBounds();
-
-  return false;
-}
-
-static std::string getInstrName(const Value *I, const std::string Suffix) {
-  assert(I && "Invalid paramater!");
-  if (I->hasName())
-    return (I->getName() + Suffix).str();
-  else
-    return ""; 
-}
-
-static Value *GetPointerOperand(Value *MemI) {
-  if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
-    return LMemI->getPointerOperand();
-  } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
-    return SMemI->getPointerOperand();
-  } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
-    if (IMemI->getIntrinsicID() == Intrinsic::prefetch)
-      return IMemI->getArgOperand(0);
-  }
-
-  return nullptr;
-}
-
-bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
-  if (skipFunction(F))
-    return false;
-
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-  ST = TM ? TM->getSubtargetImpl(F) : nullptr;
-
-  bool MadeChange = false;
-
-  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
-    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
-      MadeChange |= runOnLoop(*L);
-
-  return MadeChange;
-}
-
-void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
-                                        SmallVector<Bucket, 16> &Buckets,
-                                        unsigned MaxCandidateNum) {
-  assert((MemI && GetPointerOperand(MemI)) &&
-         "Candidate should be a memory instruction.");
-  assert(LSCEV && "Invalid SCEV for Ptr value.");
-  bool FoundBucket = false;
-  for (auto &B : Buckets) {
-    const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
-    if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
-      B.Elements.push_back(BucketElement(CDiff, MemI));
-      FoundBucket = true;
-      break;
-    }
-  }
-
-  if (!FoundBucket) {
-    if (Buckets.size() == MaxCandidateNum)
-      return;
-    Buckets.push_back(Bucket(LSCEV, MemI));
-  }
-}
-
-SmallVector<Bucket, 16> PPCLoopPreIncPrep::collectCandidates(
-    Loop *L,
-    std::function<bool(const Instruction *, const Value *)> isValidCandidate,
-    unsigned MaxCandidateNum) {
-  SmallVector<Bucket, 16> Buckets;
-  for (const auto &BB : L->blocks())
-    for (auto &J : *BB) {
-      Value *PtrValue;
-      Instruction *MemI;
-
-      if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) {
-        MemI = LMemI;
-        PtrValue = LMemI->getPointerOperand();
-      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) {
-        MemI = SMemI;
-        PtrValue = SMemI->getPointerOperand();
-      } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
-        if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
-          MemI = IMemI;
-          PtrValue = IMemI->getArgOperand(0);
-        } else continue;
-      } else continue;
-
-      unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
-      if (PtrAddrSpace)
-        continue;
-
-      if (L->isLoopInvariant(PtrValue))
-        continue;
-
-      const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
-      const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
-      if (!LARSCEV || LARSCEV->getLoop() != L)
-        continue;
-
-      if (isValidCandidate(&J, PtrValue))
-        addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum);
-    }
-  return Buckets;
-}
-
-// TODO: implement a more clever base choosing policy.
-// Currently we always choose an exist load/store offset. This maybe lead to
-// suboptimal code sequences. For example, for one DS chain with offsets
-// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
-// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
-// multipler of 4, it cannot be represented by sint16.
-bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
-  // We have a choice now of which instruction's memory operand we use as the
-  // base for the generated PHI. Always picking the first instruction in each
-  // bucket does not work well, specifically because that instruction might
-  // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
-  // the choice is somewhat arbitrary, because the backend will happily
-  // generate direct offsets from both the pre-incremented and
-  // post-incremented pointer values. Thus, we'll pick the first non-prefetch
-  // instruction in each bucket, and adjust the recurrence and other offsets
-  // accordingly.
-  for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {
-    if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))
-      if (II->getIntrinsicID() == Intrinsic::prefetch)
-        continue;
-
-    // If we'd otherwise pick the first element anyway, there's nothing to do.
-    if (j == 0)
-      break;
-
-    // If our chosen element has no offset from the base pointer, there's
-    // nothing to do.
-    if (!BucketChain.Elements[j].Offset ||
-        BucketChain.Elements[j].Offset->isZero())
-      break;
-
-    const SCEV *Offset = BucketChain.Elements[j].Offset;
-    BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);
-    for (auto &E : BucketChain.Elements) {
-      if (E.Offset)
-        E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
-      else
-        E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
-    }
-
-    std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);
-    break;
-  }
-  return true;
-}
-
-bool PPCLoopPreIncPrep::rewriteLoadStores(
-    Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged) {
-  bool MadeChange = false;
-  const SCEVAddRecExpr *BasePtrSCEV =
-      cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
-  if (!BasePtrSCEV->isAffine())
-    return MadeChange;
-
-  LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
-
-  assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
-
-  // The instruction corresponding to the Bucket's BaseSCEV must be the first
-  // in the vector of elements.
-  Instruction *MemI = BucketChain.Elements.begin()->Instr;
-  Value *BasePtr = GetPointerOperand(MemI);
-  assert(BasePtr && "No pointer operand");
-
-  Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
-  Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
-    BasePtr->getType()->getPointerAddressSpace());
-
-  const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
-  if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
-    return MadeChange;
-
-  const SCEVConstant *BasePtrIncSCEV =
-    dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
-  if (!BasePtrIncSCEV)
-    return MadeChange;
-  BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
-  if (!isSafeToExpand(BasePtrStartSCEV, *SE))
-    return MadeChange;
-
-  if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV))
-    return MadeChange;
-
-  LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
-
-  BasicBlock *Header = L->getHeader();
-  unsigned HeaderLoopPredCount = pred_size(Header);
-  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
-
-  PHINode *NewPHI =
-      PHINode::Create(I8PtrTy, HeaderLoopPredCount,
-                      getInstrName(MemI, PHINodeNameSuffix),
-                      Header->getFirstNonPHI());
-
-  SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
-  Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
-                                            LoopPredecessor->getTerminator());
-
-  // Note that LoopPredecessor might occur in the predecessor list multiple
-  // times, and we need to add it the right number of times.
-  for (const auto &PI : predecessors(Header)) {
-    if (PI != LoopPredecessor)
-      continue;
-
-    NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
-  }
-
-  Instruction *InsPoint = &*Header->getFirstInsertionPt();
-  GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
-      I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
-      getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
-  PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
-  for (const auto &PI : predecessors(Header)) {
-    if (PI == LoopPredecessor)
-      continue;
-
-    NewPHI->addIncoming(PtrInc, PI);
-  }
-
-  Instruction *NewBasePtr;
-  if (PtrInc->getType() != BasePtr->getType())
-    NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
-      getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
-  else
-    NewBasePtr = PtrInc;
-
-  if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
-    BBChanged.insert(IDel->getParent());
-  BasePtr->replaceAllUsesWith(NewBasePtr);
-  RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
-
-  // Keep track of the replacement pointer values we've inserted so that we
-  // don't generate more pointer values than necessary.
-  SmallPtrSet<Value *, 16> NewPtrs;
-  NewPtrs.insert(NewBasePtr);
-
-  for (auto I = std::next(BucketChain.Elements.begin()),
-       IE = BucketChain.Elements.end(); I != IE; ++I) {
-    Value *Ptr = GetPointerOperand(I->Instr);
-    assert(Ptr && "No pointer operand");
-    if (NewPtrs.count(Ptr))
-      continue;
-
-    Instruction *RealNewPtr;
-    if (!I->Offset || I->Offset->getValue()->isZero()) {
-      RealNewPtr = NewBasePtr;
-    } else {
-      Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
-      if (PtrIP && isa<Instruction>(NewBasePtr) &&
-          cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
-        PtrIP = nullptr;
-      else if (PtrIP && isa<PHINode>(PtrIP))
-        PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
-      else if (!PtrIP)
-        PtrIP = I->Instr;
-
-      GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
-          I8Ty, PtrInc, I->Offset->getValue(),
-          getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP);
-      if (!PtrIP)
-        NewPtr->insertAfter(cast<Instruction>(PtrInc));
-      NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
-      RealNewPtr = NewPtr;
-    }
-
-    if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
-      BBChanged.insert(IDel->getParent());
-
-    Instruction *ReplNewPtr;
-    if (Ptr->getType() != RealNewPtr->getType()) {
-      ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
-        getInstrName(Ptr, CastNodeNameSuffix));
-      ReplNewPtr->insertAfter(RealNewPtr);
-    } else
-      ReplNewPtr = RealNewPtr;
-
-    Ptr->replaceAllUsesWith(ReplNewPtr);
-    RecursivelyDeleteTriviallyDeadInstructions(Ptr);
-
-    NewPtrs.insert(RealNewPtr);
-  }
-
-  MadeChange = true;
-  UpdFormChainRewritten++;
-
-  return MadeChange;
-}
-
-bool PPCLoopPreIncPrep::updateFormPrep(Loop *L,
-                                       SmallVector<Bucket, 16> &Buckets) {
-  bool MadeChange = false;
-  if (Buckets.empty())
-    return MadeChange;
-  SmallSet<BasicBlock *, 16> BBChanged;
-  for (auto &Bucket : Buckets)
-    // The base address of each bucket is transformed into a phi and the others
-    // are rewritten based on new base.
-    if (prepareBaseForUpdateFormChain(Bucket))
-      MadeChange |= rewriteLoadStores(L, Bucket, BBChanged);
-  if (MadeChange)
-    for (auto &BB : L->blocks())
-      if (BBChanged.count(BB))
-        DeleteDeadPHIs(BB);
-  return MadeChange;
-}
-
-// In order to prepare for the pre-increment a PHI is added.
-// This function will check to see if that PHI already exists and will return
-// true if it found an existing PHI with the same start and increment as the
-// one we wanted to create.
-bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI,
-                                        const SCEV *BasePtrStartSCEV,
-                                        const SCEVConstant *BasePtrIncSCEV) {
-  BasicBlock *BB = MemI->getParent();
-  if (!BB)
-    return false;
-
-  BasicBlock *PredBB = L->getLoopPredecessor();
-  BasicBlock *LatchBB = L->getLoopLatch();
-
-  if (!PredBB || !LatchBB)
-    return false;
-
-  // Run through the PHIs and see if we have some that looks like a preparation
-  iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
-  for (auto & CurrentPHI : PHIIter) {
-    PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
-    if (!CurrentPHINode)
-      continue;
-
-    if (!SE->isSCEVable(CurrentPHINode->getType()))
-      continue;
-
-    const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
-
-    const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
-    if (!PHIBasePtrSCEV)
-      continue;
-
-    const SCEVConstant *PHIBasePtrIncSCEV =
-      dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE));
-    if (!PHIBasePtrIncSCEV)
-      continue;
-
-    if (CurrentPHINode->getNumIncomingValues() == 2) {
-      if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&
-           CurrentPHINode->getIncomingBlock(1) == PredBB) ||
-          (CurrentPHINode->getIncomingBlock(1) == LatchBB &&
-           CurrentPHINode->getIncomingBlock(0) == PredBB)) {
-        if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV &&
-            PHIBasePtrIncSCEV == BasePtrIncSCEV) {
-          // The existing PHI (CurrentPHINode) has the same start and increment
-          //  as the PHI that we wanted to create.
-          ++PHINodeAlreadyExists;
-          return true;
-        }
-      }
-    }
-  }
-  return false;
-}
-
-bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
-  bool MadeChange = false;
-
-  // Only prep. the inner-most loop
-  if (!L->empty())
-    return MadeChange;
-
-  LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
-
-  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
-  // If there is no loop predecessor, or the loop predecessor's terminator
-  // returns a value (which might contribute to determining the loop's
-  // iteration space), insert a new preheader for the loop.
-  if (!LoopPredecessor ||
-      !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
-    LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
-    if (LoopPredecessor)
-      MadeChange = true;
-  }
-  if (!LoopPredecessor) {
-    LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
-    return MadeChange;
-  }
-
-  // Check if a load/store has update form. This lambda is used by function
-  // collectCandidates which can collect candidates for types defined by lambda.
-  auto isUpdateFormCandidate = [&] (const Instruction *I,
-                                    const Value *PtrValue) {
-    assert((PtrValue && I) && "Invalid parameter!");
-    // There are no update forms for Altivec vector load/stores.
-    if (ST && ST->hasAltivec() &&
-        PtrValue->getType()->getPointerElementType()->isVectorTy())
-      return false;
-    // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
-    // be 4's multiple (DS-form). For i64 loads/stores when the displacement
-    // fits in a 16-bit signed field but isn't a multiple of 4, it will be
-    // useless and possible to break some original well-form addressing mode
-    // to make this pre-inc prep for it.
-    if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) {
-      const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);
-      const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);
-      if (!LARSCEV || LARSCEV->getLoop() != L)
-        return false;
-      if (const SCEVConstant *StepConst =
-              dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {
-        const APInt &ConstInt = StepConst->getValue()->getValue();
-        if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)
-          return false;
-      }
-    }
-    return true;
-  };
-
-  // Collect buckets of comparable addresses used by loads, stores and prefetch
-  // intrinsic for update form.
-  SmallVector<Bucket, 16> UpdateFormBuckets =
-      collectCandidates(L, isUpdateFormCandidate, MaxVars);
-
-  // Prepare for update form.
-  if (!UpdateFormBuckets.empty())
-    MadeChange |= updateFormPrep(L, UpdateFormBuckets);
-
-  return MadeChange;
-}
diff --git a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
new file mode 100644
index 000000000000..83cca11b27a3
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp
@@ -0,0 +1,164 @@
+//===-- PPCLowerMASSVEntries.cpp ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering of MASSV (SIMD) entries for specific PowerPC
+// subtargets.
+// Following is an example of a conversion specific to Power9 subtarget:
+// __sind2_massv ---> __sind2_P9
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "ppc-lower-massv-entries"
+
+using namespace llvm;
+
+namespace {
+
+// Length of the suffix "massv", which is specific to IBM MASSV library entries.
+const unsigned MASSVSuffixLength = 5;
+
+static StringRef MASSVFuncs[] = {
+#define TLI_DEFINE_MASSV_VECFUNCS_NAMES
+#include "llvm/Analysis/VecFuncs.def"
+};
+
+class PPCLowerMASSVEntries : public ModulePass {
+public:
+  static char ID;
+
+  PPCLowerMASSVEntries() : ModulePass(ID) {}
+
+  bool runOnModule(Module &M) override;
+
+  StringRef getPassName() const override { return "PPC Lower MASS Entries"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
+
+private:
+  static bool isMASSVFunc(StringRef Name);
+  static StringRef getCPUSuffix(const PPCSubtarget *Subtarget);
+  static std::string createMASSVFuncName(Function &Func,
+                                         const PPCSubtarget *Subtarget);
+  bool lowerMASSVCall(CallInst *CI, Function &Func, Module &M,
+                      const PPCSubtarget *Subtarget);
+};
+
+} // namespace
+
+/// Checks if the specified function name represents an entry in the MASSV
+/// library.
+bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) {
+  auto Iter = std::find(std::begin(MASSVFuncs), std::end(MASSVFuncs), Name);
+  return Iter != std::end(MASSVFuncs);
+}
+
+// FIXME:
+/// Returns a string corresponding to the specified PowerPC subtarget. e.g.:
+/// "P8" for Power8, "P9" for Power9. The string is used as a suffix while
+/// generating subtarget-specific MASSV library functions. Current support
+/// includes  Power8 and Power9 subtargets.
+StringRef PPCLowerMASSVEntries::getCPUSuffix(const PPCSubtarget *Subtarget) {
+  // Assume Power8 when Subtarget is unavailable.
+  if (!Subtarget)
+    return "P8";
+  if (Subtarget->hasP9Vector())
+    return "P9";
+  if (Subtarget->hasP8Vector())
+    return "P8";
+
+  report_fatal_error("Unsupported Subtarget: MASSV is supported only on "
+                     "Power8 and Power9 subtargets.");
+}
+
+/// Creates PowerPC subtarget-specific name corresponding to the specified
+/// generic MASSV function, and the PowerPC subtarget.
+std::string
+PPCLowerMASSVEntries::createMASSVFuncName(Function &Func,
+                                          const PPCSubtarget *Subtarget) {
+  StringRef Suffix = getCPUSuffix(Subtarget);
+  auto GenericName = Func.getName().drop_back(MASSVSuffixLength).str();
+  std::string MASSVEntryName = GenericName + Suffix.str();
+  return MASSVEntryName;
+}
+
+/// Lowers generic MASSV entries to PowerPC subtarget-specific MASSV entries.
+/// e.g.: __sind2_massv --> __sind2_P9 for a Power9 subtarget.
+/// Both function prototypes and their callsites are updated during lowering.
+bool PPCLowerMASSVEntries::lowerMASSVCall(CallInst *CI, Function &Func,
+                                          Module &M,
+                                          const PPCSubtarget *Subtarget) {
+  if (CI->use_empty())
+    return false;
+
+  std::string MASSVEntryName = createMASSVFuncName(Func, Subtarget);
+  FunctionCallee FCache = M.getOrInsertFunction(
+      MASSVEntryName, Func.getFunctionType(), Func.getAttributes());
+
+  CallSite CS(CI);
+  CI->setCalledFunction(FCache);  
+
+  return true;
+}
+
+bool PPCLowerMASSVEntries::runOnModule(Module &M) {
+  bool Changed = false;
+
+  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+  if (!TPC)
+    return Changed;
+
+  auto &TM = TPC->getTM<PPCTargetMachine>();
+  const PPCSubtarget *Subtarget;
+
+  for (Function &Func : M) {
+    if (!Func.isDeclaration())
+      continue;
+
+    if (!isMASSVFunc(Func.getName()))
+      continue;
+
+    // Call to lowerMASSVCall() invalidates the iterator over users upon
+    // replacing the users. Precomputing the current list of users allows us to
+    // replace all the call sites.
+    SmallVector<User *, 4> MASSVUsers;
+    for (auto *User: Func.users())
+      MASSVUsers.push_back(User);
+    
+    for (auto *User : MASSVUsers) {
+      auto *CI = dyn_cast<CallInst>(User);
+      if (!CI)
+        continue;
+
+      Subtarget = &TM.getSubtarget<PPCSubtarget>(*CI->getParent()->getParent());
+      Changed |= lowerMASSVCall(CI, Func, M, Subtarget);
+    }
+  }
+
+  return Changed;
+}
+
+char PPCLowerMASSVEntries::ID = 0;
+
+char &llvm::PPCLowerMASSVEntriesID = PPCLowerMASSVEntries::ID;
+
+INITIALIZE_PASS(PPCLowerMASSVEntries, DEBUG_TYPE, "Lower MASSV entries", false,
+                false)
+
+ModulePass *llvm::createPPCLowerMASSVEntriesPass() {
+  return new PPCLowerMASSVEntries();
+}
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index ac8ac060f460..74192cb20cd0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -18,6 +18,8 @@
 //
 //===---------------------------------------------------------------------===//
 
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
@@ -26,12 +28,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
-#include "MCTargetDesc/PPCPredicates.h"
 
 using namespace llvm;
 
@@ -160,33 +162,33 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
 static unsigned
 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
   unsigned Opcode = MI->getOpcode();
-  if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
-      Opcode == PPC::RLDCL  || Opcode == PPC::RLDCLo)
+  if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
+      Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
     return MI->getOperand(3).getImm();
 
-  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
-       MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
+  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
+      MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
     return MI->getOperand(3).getImm();
 
-  if ((Opcode == PPC::RLWINM  || Opcode == PPC::RLWINMo ||
-       Opcode == PPC::RLWNM   || Opcode == PPC::RLWNMo  ||
+  if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+       Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
        Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
-       MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
+      MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
     return 32 + MI->getOperand(3).getImm();
 
-  if (Opcode == PPC::ANDIo) {
+  if (Opcode == PPC::ANDI_rec) {
     uint16_t Imm = MI->getOperand(2).getImm();
     return 48 + countLeadingZeros(Imm);
   }
 
-  if (Opcode == PPC::CNTLZW  || Opcode == PPC::CNTLZWo ||
-      Opcode == PPC::CNTTZW  || Opcode == PPC::CNTTZWo ||
+  if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
+      Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
       Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
     // The result ranges from 0 to 32.
     return 58;
 
-  if (Opcode == PPC::CNTLZD  || Opcode == PPC::CNTLZDo ||
-      Opcode == PPC::CNTTZD  || Opcode == PPC::CNTTZDo)
+  if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
+      Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
     // The result ranges from 0 to 64.
     return 57;
 
@@ -331,108 +333,121 @@ bool PPCMIPeephole::simplifyCode(void) {
         // is identified by an immediate value of 0 or 3.
         int Immed = MI.getOperand(3).getImm();
 
-        if (Immed != 1) {
-
-          // For each of these simplifications, we need the two source
-          // regs to match.  Unfortunately, MachineCSE ignores COPY and
-          // SUBREG_TO_REG, so for example we can see
-          //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
-          // We have to look through chains of COPY and SUBREG_TO_REG
-          // to find the real source values for comparison.
-          unsigned TrueReg1 =
-            TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
-          unsigned TrueReg2 =
-            TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
-
-          if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) {
-            MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
-            unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
-
-            // If this is a splat fed by a splatting load, the splat is
-            // redundant. Replace with a copy. This doesn't happen directly due
-            // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
-            // a load of a double to a vector of 64-bit integers.
-            auto isConversionOfLoadAndSplat = [=]() -> bool {
-              if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
-                return false;
-              unsigned DefReg =
-                TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
-              if (Register::isVirtualRegister(DefReg)) {
-                MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
-                if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
-                  return true;
-              }
-              return false;
-            };
-            if (DefMI && (Immed == 0 || Immed == 3)) {
-              if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
-                LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
-                                     "to load-and-splat/copy: ");
-                LLVM_DEBUG(MI.dump());
-                BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
-                        MI.getOperand(0).getReg())
-                    .add(MI.getOperand(1));
-                ToErase = &MI;
-                Simplified = true;
-              }
-            }
+        if (Immed == 1)
+          break;
 
-            // If this is a splat or a swap fed by another splat, we
-            // can replace it with a copy.
-            if (DefOpc == PPC::XXPERMDI) {
-              unsigned FeedImmed = DefMI->getOperand(3).getImm();
-              unsigned FeedReg1 =
-                TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
-              unsigned FeedReg2 =
-                TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
-
-              if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
-                LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
-                                     "to splat/copy: ");
-                LLVM_DEBUG(MI.dump());
-                BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
-                        MI.getOperand(0).getReg())
-                    .add(MI.getOperand(1));
-                ToErase = &MI;
-                Simplified = true;
-              }
-
-              // If this is a splat fed by a swap, we can simplify modify
-              // the splat to splat the other value from the swap's input
-              // parameter.
-              else if ((Immed == 0 || Immed == 3)
-                       && FeedImmed == 2 && FeedReg1 == FeedReg2) {
-                LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
-                LLVM_DEBUG(MI.dump());
-                MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
-                MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
-                MI.getOperand(3).setImm(3 - Immed);
-                Simplified = true;
-              }
-
-              // If this is a swap fed by a swap, we can replace it
-              // with a copy from the first swap's input.
-              else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
-                LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
-                LLVM_DEBUG(MI.dump());
-                BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
-                        MI.getOperand(0).getReg())
-                    .add(DefMI->getOperand(1));
-                ToErase = &MI;
-                Simplified = true;
-              }
-            } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
-                       (DefMI->getOperand(2).getImm() == 0 ||
-                        DefMI->getOperand(2).getImm() == 3)) {
-              // Splat fed by another splat - switch the output of the first
-              // and remove the second.
-              DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
-              ToErase = &MI;
-              Simplified = true;
-              LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
-              LLVM_DEBUG(MI.dump());
-            }
+        // For each of these simplifications, we need the two source
+        // regs to match.  Unfortunately, MachineCSE ignores COPY and
+        // SUBREG_TO_REG, so for example we can see
+        //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+        // We have to look through chains of COPY and SUBREG_TO_REG
+        // to find the real source values for comparison.
+        unsigned TrueReg1 =
+          TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+        unsigned TrueReg2 =
+          TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
+
+        if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)))
+          break;
+
+        MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+        if (!DefMI)
+          break;
+
+        unsigned DefOpc = DefMI->getOpcode();
+
+        // If this is a splat fed by a splatting load, the splat is
+        // redundant. Replace with a copy. This doesn't happen directly due
+        // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
+        // a load of a double to a vector of 64-bit integers.
+        auto isConversionOfLoadAndSplat = [=]() -> bool {
+          if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
+            return false;
+          unsigned FeedReg1 =
+            TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+          if (Register::isVirtualRegister(FeedReg1)) {
+            MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
+            if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
+              return true;
+          }
+          return false;
+        };
+        if ((Immed == 0 || Immed == 3) &&
+            (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
+          LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
+                               "to load-and-splat/copy: ");
+          LLVM_DEBUG(MI.dump());
+          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+                  MI.getOperand(0).getReg())
+              .add(MI.getOperand(1));
+          ToErase = &MI;
+          Simplified = true;
+        }
+
+        // If this is a splat or a swap fed by another splat, we
+        // can replace it with a copy.
+        if (DefOpc == PPC::XXPERMDI) {
+          unsigned DefReg1 = DefMI->getOperand(1).getReg();
+          unsigned DefReg2 = DefMI->getOperand(2).getReg();
+          unsigned DefImmed = DefMI->getOperand(3).getImm();
+
+          // If the two inputs are not the same register, check to see if
+          // they originate from the same virtual register after only
+          // copy-like instructions.
+          if (DefReg1 != DefReg2) {
+            unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
+            unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
+
+            if (!(FeedReg1 == FeedReg2 &&
+                  Register::isVirtualRegister(FeedReg1)))
+              break;
+          }
+
+          if (DefImmed == 0 || DefImmed == 3) {
+            LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
+                                 "to splat/copy: ");
+            LLVM_DEBUG(MI.dump());
+            BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+                    MI.getOperand(0).getReg())
+                .add(MI.getOperand(1));
+            ToErase = &MI;
+            Simplified = true;
           }
+
+          // If this is a splat fed by a swap, we can simplify modify
+          // the splat to splat the other value from the swap's input
+          // parameter.
+          else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
+            LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+            LLVM_DEBUG(MI.dump());
+            MI.getOperand(1).setReg(DefReg1);
+            MI.getOperand(2).setReg(DefReg2);
+            MI.getOperand(3).setImm(3 - Immed);
+            Simplified = true;
+          }
+
+          // If this is a swap fed by a swap, we can replace it
+          // with a copy from the first swap's input.
+          else if (Immed == 2 && DefImmed == 2) {
+            LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+            LLVM_DEBUG(MI.dump());
+            BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+                    MI.getOperand(0).getReg())
+                .add(DefMI->getOperand(1));
+            ToErase = &MI;
+            Simplified = true;
+          }
+        } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
+                   (DefMI->getOperand(2).getImm() == 0 ||
+                    DefMI->getOperand(2).getImm() == 3)) {
+          // Splat fed by another splat - switch the output of the first
+          // and remove the second.
+          DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+          ToErase = &MI;
+          Simplified = true;
+          LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
+          LLVM_DEBUG(MI.dump());
         }
         break;
       }
@@ -805,6 +820,153 @@ bool PPCMIPeephole::simplifyCode(void) {
                       combineSEXTAndSHL(MI, ToErase);
         break;
       }
+      case PPC::RLWINM:
+      case PPC::RLWINM_rec:
+      case PPC::RLWINM8:
+      case PPC::RLWINM8_rec: {
+        unsigned FoldingReg = MI.getOperand(1).getReg();
+        if (!Register::isVirtualRegister(FoldingReg))
+          break;
+
+        MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
+        if (SrcMI->getOpcode() != PPC::RLWINM &&
+            SrcMI->getOpcode() != PPC::RLWINM_rec &&
+            SrcMI->getOpcode() != PPC::RLWINM8 &&
+            SrcMI->getOpcode() != PPC::RLWINM8_rec)
+          break;
+        assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+                MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
+                SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+               "Invalid PPC::RLWINM Instruction!");
+        uint64_t SHSrc = SrcMI->getOperand(2).getImm();
+        uint64_t SHMI = MI.getOperand(2).getImm();
+        uint64_t MBSrc = SrcMI->getOperand(3).getImm();
+        uint64_t MBMI = MI.getOperand(3).getImm();
+        uint64_t MESrc = SrcMI->getOperand(4).getImm();
+        uint64_t MEMI = MI.getOperand(4).getImm();
+
+        assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
+               "Invalid PPC::RLWINM Instruction!");
+
+        // If MBMI is bigger than MEMI, we always can not get run of ones.
+        // RotatedSrcMask non-wrap:
+        //                 0........31|32........63
+        // RotatedSrcMask:   B---E        B---E
+        // MaskMI:         -----------|--E  B------
+        // Result:           -----          ---      (Bad candidate)
+        //
+        // RotatedSrcMask wrap:
+        //                 0........31|32........63
+        // RotatedSrcMask: --E   B----|--E    B----
+        // MaskMI:         -----------|--E  B------
+        // Result:         ---   -----|---    -----  (Bad candidate)
+        //
+        // One special case is RotatedSrcMask is a full set mask.
+        // RotatedSrcMask full:
+        //                 0........31|32........63
+        // RotatedSrcMask: ------EB---|-------EB---
+        // MaskMI:         -----------|--E  B------
+        // Result:         -----------|---  -------  (Good candidate)
+
+        // Mark special case.
+        bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+        // For other MBMI > MEMI cases, just return.
+        if ((MBMI > MEMI) && !SrcMaskFull)
+          break;
+
+        // Handle MBMI <= MEMI cases.
+        APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+        // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+        // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
+        // while in PowerPC ISA, lowerest bit is at index 63.
+        APInt MaskSrc =
+            APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
+        // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is
+        // equal to highBit.
+        // If MBSrc - MESrc == 1, we expect a full set mask instead of Null.
+        if (SrcMaskFull && (MBSrc - MESrc == 1))
+          MaskSrc.setAllBits();
+
+        APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+        APInt FinalMask = RotatedSrcMask & MaskMI;
+        uint32_t NewMB, NewME;
+
+        // If final mask is 0, MI result should be 0 too.
+        if (FinalMask.isNullValue()) {
+          bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
+                          MI.getOpcode() == PPC::RLWINM8_rec);
+
+          Simplified = true;
+
+          LLVM_DEBUG(dbgs() << "Replace Instr: ");
+          LLVM_DEBUG(MI.dump());
+
+          if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+            // Replace MI with "LI 0"
+            MI.RemoveOperand(4);
+            MI.RemoveOperand(3);
+            MI.RemoveOperand(2);
+            MI.getOperand(1).ChangeToImmediate(0);
+            MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
+          } else {
+            // Replace MI with "ANDI_rec reg, 0"
+            MI.RemoveOperand(4);
+            MI.RemoveOperand(3);
+            MI.getOperand(2).setImm(0);
+            MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
+            MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+            if (SrcMI->getOperand(1).isKill()) {
+              MI.getOperand(1).setIsKill(true);
+              SrcMI->getOperand(1).setIsKill(false);
+            } else
+              // About to replace MI.getOperand(1), clear its kill flag.
+              MI.getOperand(1).setIsKill(false);
+          }
+
+          LLVM_DEBUG(dbgs() << "With: ");
+          LLVM_DEBUG(MI.dump());
+        } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
+                               NewME) && NewMB <= NewME)|| SrcMaskFull) {
+          // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
+          // than NewME. Otherwise we get a 64 bit value after folding, but MI
+          // return a 32 bit value.
+
+          Simplified = true;
+          LLVM_DEBUG(dbgs() << "Converting Instr: ");
+          LLVM_DEBUG(MI.dump());
+
+          uint16_t NewSH = (SHSrc + SHMI) % 32;
+          MI.getOperand(2).setImm(NewSH);
+          // If SrcMI mask is full, no need to update MBMI and MEMI.
+          if (!SrcMaskFull) {
+            MI.getOperand(3).setImm(NewMB);
+            MI.getOperand(4).setImm(NewME);
+          }
+          MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+          if (SrcMI->getOperand(1).isKill()) {
+            MI.getOperand(1).setIsKill(true);
+            SrcMI->getOperand(1).setIsKill(false);
+          } else
+            // About to replace MI.getOperand(1), clear its kill flag.
+            MI.getOperand(1).setIsKill(false);
+
+          LLVM_DEBUG(dbgs() << "To: ");
+          LLVM_DEBUG(MI.dump());
+        }
+        if (Simplified) {
+          // If FoldingReg has no non-debug use and it has no implicit def (it
+          // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
+          // Otherwise keep it.
+          ++NumRotatesCollapsed;
+          if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) {
+            ToErase = SrcMI;
+            LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+            LLVM_DEBUG(SrcMI->dump());
+          }
+        }
+        break;
+      }
       }
     }
 
diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index dfae19804d94..2b341b5952c8 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -42,7 +42,7 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
   /// function.  This is only valid after the initial scan of the function by
   /// PEI.
-  bool MustSaveLR;
+  bool MustSaveLR = false;
 
   /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
   /// prologue of the function. This is typically the case when there are
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index b1c0433641dd..a4b4bf2973d1 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -35,6 +35,8 @@ STATISTIC(NumRemovedInPreEmit,
           "Number of instructions deleted in pre-emit peephole");
 STATISTIC(NumberOfSelfCopies,
           "Number of self copy instructions eliminated");
+STATISTIC(NumFrameOffFoldInPreEmit,
+          "Number of folding frame offset by using r+r in pre-emit peephole");
 
 static cl::opt<bool>
 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
@@ -161,8 +163,19 @@ namespace {
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override {
-      if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole)
+      if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
+        // Remove UNENCODED_NOP even when this pass is disabled.
+        // This needs to be done unconditionally so we don't emit zeros
+        // in the instruction stream.
+        SmallVector<MachineInstr *, 4> InstrsToErase;
+        for (MachineBasicBlock &MBB : MF)
+          for (MachineInstr &MI : MBB)
+            if (MI.getOpcode() == PPC::UNENCODED_NOP)
+              InstrsToErase.push_back(&MI);
+        for (MachineInstr *MI : InstrsToErase)
+          MI->eraseFromParent();
         return false;
+      }
       bool Changed = false;
       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -171,6 +184,10 @@ namespace {
         Changed |= removeRedundantLIs(MBB, TRI);
         for (MachineInstr &MI : MBB) {
           unsigned Opc = MI.getOpcode();
+          if (Opc == PPC::UNENCODED_NOP) {
+            InstrsToErase.push_back(&MI);
+            continue;
+          }
           // Detect self copies - these can result from running AADB.
           if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
             const MCInstrDesc &MCID = TII->get(Opc);
@@ -202,6 +219,12 @@ namespace {
               InstrsToErase.push_back(DefMIToErase);
             }
           }
+          if (TII->foldFrameOffset(MI)) {
+            Changed = true;
+            NumFrameOffFoldInPreEmit++;
+            LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
+            LLVM_DEBUG(MI.dump());
+          }
         }
 
         // Eliminate conditional branch based on a constant CR bit by
diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 3b71ed219c17..90cc81beb89d 100644
--- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -375,10 +376,10 @@ public:
   };
 
 private:
-  const PPCInstrInfo *TII;
-  MachineFunction *MF;
-  MachineRegisterInfo *MRI;
-  const MachineBranchProbabilityInfo *MBPI;
+  const PPCInstrInfo *TII = nullptr;
+  MachineFunction *MF = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  const MachineBranchProbabilityInfo *MBPI = nullptr;
 
   // A vector to contain all the CR logical operations
   SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
@@ -470,21 +471,21 @@ PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
   } else {
     MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
                                            Ret.SubregDef1, Ret.CopyDefs.first);
+    assert(Def1 && "Must be able to find a definition of operand 1.");
     Ret.DefsSingleUse &=
       MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
     Ret.DefsSingleUse &=
       MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
-    assert(Def1 && "Must be able to find a definition of operand 1.");
     if (isBinary(MIParam)) {
       Ret.IsBinary = 1;
       MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
                                              Ret.SubregDef2,
                                              Ret.CopyDefs.second);
+      assert(Def2 && "Must be able to find a definition of operand 2.");
       Ret.DefsSingleUse &=
         MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
       Ret.DefsSingleUse &=
         MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
-      assert(Def2 && "Must be able to find a definition of operand 2.");
       Ret.TrueDefs = std::make_pair(Def1, Def2);
     } else {
       Ret.TrueDefs = std::make_pair(Def1, nullptr);
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 9ec26a19bdaa..01b97ba6ab20 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -380,7 +380,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
     // This is eiher:
     // 1) A fixed frame index object which we know are aligned so
     // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
-    // need to consider the alignement here.
+    // need to consider the alignment here.
     // 2) A not fixed object but in that case we now know that the min required
     // alignment is no more than 1 based on the previous check.
     if (InstrInfo->isXFormMemOp(Opcode))
@@ -747,12 +747,18 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
   Register SrcReg = MI.getOperand(0).getReg();
 
   // Search up the BB to find the definition of the CR bit.
-  MachineBasicBlock::reverse_iterator Ins;
+  MachineBasicBlock::reverse_iterator Ins = MI;
+  MachineBasicBlock::reverse_iterator Rend = MBB.rend();
+  ++Ins;
   unsigned CRBitSpillDistance = 0;
-  for (Ins = MI; Ins != MBB.rend(); Ins++) {
+  bool SeenUse = false;
+  for (; Ins != Rend; ++Ins) {
     // Definition found.
     if (Ins->modifiesRegister(SrcReg, TRI))
       break;
+    // Use found.
+    if (Ins->readsRegister(SrcReg, TRI))
+      SeenUse = true;
     // Unable to find CR bit definition within maximum search distance.
     if (CRBitSpillDistance == MaxCRBitSpillDist) {
       Ins = MI;
@@ -767,17 +773,35 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
   if (Ins == MBB.rend())
     Ins = MI;
 
+  bool SpillsKnownBit = false;
   // There is no need to extract the CR bit if its value is already known.
   switch (Ins->getOpcode()) {
   case PPC::CRUNSET:
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
       .addImm(0);
+    SpillsKnownBit = true;
     break;
   case PPC::CRSET:
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
       .addImm(-32768);
+    SpillsKnownBit = true;
     break;
   default:
+    // On Power9, we can use SETB to extract the LT bit. This only works for
+    // the LT bit since SETB produces -1/1/0 for LT/GT/<neither>. So the value
+    // of the bit we care about (32-bit sign bit) will be set to the value of
+    // the LT bit (regardless of the other bits in the CR field).
+    if (Subtarget.isISA3_0()) {
+      if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR1LT ||
+          SrcReg == PPC::CR2LT || SrcReg == PPC::CR3LT ||
+          SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
+          SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
+        BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
+          .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
+        break;
+      }
+    }
+
     // We need to move the CR field that contains the CR bit we are spilling.
     // The super register may not be explicitly defined (i.e. it can be defined
     // by a CR-logical that only defines the subreg) so we state that the CR
@@ -803,8 +827,13 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
                     .addReg(Reg, RegState::Kill),
                     FrameIndex);
 
+  bool KillsCRBit = MI.killsRegister(SrcReg, TRI);
   // Discard the pseudo instruction.
   MBB.erase(II);
+  if (SpillsKnownBit && KillsCRBit && !SeenUse) {
+    Ins->setDesc(TII.get(PPC::UNENCODED_NOP));
+    Ins->RemoveOperand(0);
+  }
 }
 
 void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index a50e05920cd4..a5fbb0c6ec64 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,15 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
 public:
   PPCRegisterInfo(const PPCTargetMachine &TM);
 
+  /// getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode
+  /// for a given imm form load/store opcode \p ImmFormOpcode.
+  /// FIXME: move this to PPCInstrInfo class.
+  unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const {
+    if (!ImmToIdxMap.count(ImmOpcode))
+      return PPC::INSTRUCTION_LIST_END;
+    return ImmToIdxMap.find(ImmOpcode)->second;
+  }
+
   /// getPointerRegClass - Return the register class to use to hold pointers.
   /// This is used for addressing modes.
   const TargetRegisterClass *
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 10568ed4b655..0997f68bd999 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -61,7 +61,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
 
 void PPCSubtarget::initializeEnvironment() {
   StackAlignment = Align(16);
-  DarwinDirective = PPC::DIR_NONE;
+  CPUDirective = PPC::DIR_NONE;
   HasMFOCRF = false;
   Has64BitSupport = false;
   Use64BitRegs = false;
@@ -100,6 +100,7 @@ void PPCSubtarget::initializeEnvironment() {
   IsPPC6xx = false;
   IsE500 = false;
   FeatureMFTB = false;
+  AllowsUnalignedFPAccess = false;
   DeprecatedDST = false;
   HasLazyResolverStubs = false;
   HasICBT = false;
@@ -126,6 +127,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     // If cross-compiling with -march=ppc64le without -mcpu
     if (TargetTriple.getArch() == Triple::ppc64le)
       CPUName = "ppc64le";
+    else if (TargetTriple.getSubArch() == Triple::PPCSubArch_spe)
+      CPUName = "e500";
     else
       CPUName = "generic";
   }
@@ -190,7 +193,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
 bool PPCSubtarget::enableMachineScheduler() const { return true; }
 
 bool PPCSubtarget::enableMachinePipeliner() const {
-  return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
+  return (CPUDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
 }
 
 bool PPCSubtarget::useDFAforSMS() const { return false; }
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index d96c2893aee9..044e982740e9 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -57,6 +57,7 @@ namespace PPC {
     DIR_PWR7,
     DIR_PWR8,
     DIR_PWR9,
+    DIR_PWR_FUTURE,
     DIR_64
   };
 }
@@ -84,7 +85,7 @@ protected:
   InstrItineraryData InstrItins;
 
   /// Which cpu directive was used.
-  unsigned DarwinDirective;
+  unsigned CPUDirective;
 
   /// Used by the ISel to turn in optimizations for POWER4-derived architectures
   bool HasMFOCRF;
@@ -123,6 +124,7 @@ protected:
   bool IsPPC4xx;
   bool IsPPC6xx;
   bool FeatureMFTB;
+  bool AllowsUnalignedFPAccess;
   bool DeprecatedDST;
   bool HasLazyResolverStubs;
   bool IsLittleEndian;
@@ -169,8 +171,11 @@ public:
   Align getStackAlignment() const { return StackAlignment; }
 
   /// getDarwinDirective - Returns the -m directive specified for the cpu.
+  unsigned getDarwinDirective() const { return CPUDirective; }
+
+  /// getCPUDirective - Returns the -m directive specified for the cpu.
   ///
-  unsigned getDarwinDirective() const { return DarwinDirective; }
+  unsigned getCPUDirective() const { return CPUDirective; }
 
   /// getInstrItins - Return the instruction itineraries based on subtarget
   /// selection.
@@ -270,6 +275,7 @@ public:
   bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
   bool isE500() const { return IsE500; }
   bool isFeatureMFTB() const { return FeatureMFTB; }
+  bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
   bool isDeprecatedDST() const { return DeprecatedDST; }
   bool hasICBT() const { return HasICBT; }
   bool hasInvariantFunctionDescriptors() const {
@@ -347,6 +353,41 @@ public:
   /// True if the GV will be accessed via an indirect symbol.
   bool isGVIndirectSymbol(const GlobalValue *GV) const;
 
+  /// True if the ABI is descriptor based.
+  bool usesFunctionDescriptors() const {
+    // Both 32-bit and 64-bit AIX are descriptor based. For ELF only the 64-bit
+    // v1 ABI uses descriptors.
+    return isAIXABI() || (is64BitELFABI() && !isELFv2ABI());
+  }
+
+  unsigned descriptorTOCAnchorOffset() const {
+    assert(usesFunctionDescriptors() &&
+           "Should only be called when the target uses descriptors.");
+    return IsPPC64 ? 8 : 4;
+  }
+
+  unsigned descriptorEnvironmentPointerOffset() const {
+    assert(usesFunctionDescriptors() &&
+           "Should only be called when the target uses descriptors.");
+    return IsPPC64 ? 16 : 8;
+  }
+
+  MCRegister getEnvironmentPointerRegister() const {
+    assert(usesFunctionDescriptors() &&
+           "Should only be called when the target uses descriptors.");
+     return IsPPC64 ? PPC::X11 : PPC::R11;
+  }
+
+  MCRegister getTOCPointerRegister() const {
+    assert((is64BitELFABI() || isAIXABI()) &&
+           "Should only be called when the target is a TOC based ABI.");
+    return IsPPC64 ? PPC::X2 : PPC::R2;
+  }
+
+  MCRegister getStackPointerRegister() const {
+    return IsPPC64 ? PPC::X1 : PPC::R1;
+  }
+
   bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
 };
 } // End llvm namespace
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 8f313d9d01c4..17e1196eea59 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index abefee8b339d..2caf4c99a1f8 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -51,8 +51,8 @@ opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
                         cl::desc("Disable CTR loops for PPC"));
 
 static cl::
-opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
-                            cl::desc("Disable PPC loop preinc prep"));
+opt<bool> DisableInstrFormPrep("disable-ppc-instr-form-prep", cl::Hidden,
+                            cl::desc("Disable PPC loop instr form prep"));
 
 static cl::opt<bool>
 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
@@ -77,7 +77,7 @@ EnableGEPOpt("ppc-gep-opt", cl::Hidden,
 
 static cl::opt<bool>
 EnablePrefetch("enable-ppc-prefetching",
-                  cl::desc("disable software prefetching on PPC"),
+                  cl::desc("enable software prefetching on PPC"),
                   cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
@@ -94,7 +94,7 @@ static cl::opt<bool>
   ReduceCRLogical("ppc-reduce-cr-logicals",
                   cl::desc("Expand eligible cr-logical binary ops to branches"),
                   cl::init(true), cl::Hidden);
-extern "C" void LLVMInitializePowerPCTarget() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
   // Register the targets
   RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
   RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
@@ -104,7 +104,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
 #ifndef NDEBUG
   initializePPCCTRLoopsVerifyPass(PR);
 #endif
-  initializePPCLoopPreIncPrepPass(PR);
+  initializePPCLoopInstrFormPrepPass(PR);
   initializePPCTOCRegDepsPass(PR);
   initializePPCEarlyReturnPass(PR);
   initializePPCVSXCopyPass(PR);
@@ -119,6 +119,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
   initializePPCPreEmitPeepholePass(PR);
   initializePPCTLSDynamicCallPass(PR);
   initializePPCMIPeepholePass(PR);
+  initializePPCLowerMASSVEntriesPass(PR);
 }
 
 /// Return the datalayout string of a subtarget.
@@ -214,8 +215,6 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
   case Triple::ppc64le:
     return PPCTargetMachine::PPC_ABI_ELFv2;
   case Triple::ppc64:
-    if (TT.getEnvironment() == llvm::Triple::ELFv2)
-      return PPCTargetMachine::PPC_ABI_ELFv2;
     return PPCTargetMachine::PPC_ABI_ELFv1;
   default:
     return PPCTargetMachine::PPC_ABI_UNKNOWN;
@@ -401,6 +400,9 @@ void PPCPassConfig::addIRPasses() {
     addPass(createPPCBoolRetToIntPass());
   addPass(createAtomicExpandPass());
 
+  // Lower generic MASSV routines to PowerPC subtarget-specific entries.
+  addPass(createPPCLowerMASSVEntriesPass());
+  
   // For the BG/Q (or if explicitly requested), add explicit data prefetch
   // intrinsics.
   bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
@@ -427,8 +429,8 @@ void PPCPassConfig::addIRPasses() {
 }
 
 bool PPCPassConfig::addPreISel() {
-  if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
-    addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
+  if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
 
   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createHardwareLoopsPass());
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f51300c656aa..e05699cc95ec 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -84,10 +84,10 @@ int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   return 4 * TTI::TCC_Basic;
 }
 
-int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                              Type *Ty) {
+int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                    const APInt &Imm, Type *Ty) {
   if (DisablePPCConstHoist)
-    return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
+    return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty);
 
   assert(Ty->isIntegerTy());
 
@@ -118,10 +118,10 @@ int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
   return PPCTTIImpl::getIntImmCost(Imm, Ty);
 }
 
-int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                              Type *Ty) {
+int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+                                  const APInt &Imm, Type *Ty) {
   if (DisablePPCConstHoist)
-    return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
+    return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty);
 
   assert(Ty->isIntegerTy());
 
@@ -283,24 +283,6 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
           case Intrinsic::loop_decrement:
             return true;
 
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
-                       !defined(setjmp_undefined_for_msvc)
-#  pragma push_macro("setjmp")
-#  undef setjmp
-#  define setjmp_undefined_for_msvc
-#endif
-
-          case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-#  pragma pop_macro("setjmp")
-#  undef setjmp_undefined_for_msvc
-#endif
-
-          case Intrinsic::longjmp:
-
           // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
           // because, although it does clobber the counter register, the
           // control can't then return to inside the loop unless there is also
@@ -331,8 +313,12 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
           case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
           case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
           case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
+          case Intrinsic::lrint:              Opcode = ISD::LRINT;      break;
+          case Intrinsic::llrint:             Opcode = ISD::LLRINT;     break;
           case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
           case Intrinsic::round:              Opcode = ISD::FROUND;     break;
+          case Intrinsic::lround:             Opcode = ISD::LROUND;     break;
+          case Intrinsic::llround:            Opcode = ISD::LLROUND;    break;
           case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
           case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
           case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
@@ -550,7 +536,7 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
 
 void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                          TTI::UnrollingPreferences &UP) {
-  if (ST->getDarwinDirective() == PPC::DIR_A2) {
+  if (ST->getCPUDirective() == PPC::DIR_A2) {
     // The A2 is in-order with a deep pipeline, and concatenation unrolling
     // helps expose latency-hiding opportunities to the instruction scheduler.
     UP.Partial = UP.Runtime = true;
@@ -576,7 +562,7 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
   // on combining the loads generated for consecutive accesses, and failure to
   // do so is particularly expensive. This makes it much more likely (compared
   // to only using concatenation unrolling).
-  if (ST->getDarwinDirective() == PPC::DIR_A2)
+  if (ST->getCPUDirective() == PPC::DIR_A2)
     return true;
 
   return LoopHasReductions;
@@ -598,8 +584,8 @@ unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
   assert(ClassID == GPRRC || ClassID == FPRRC ||
          ClassID == VRRC || ClassID == VSXRC);
   if (ST->hasVSX()) {
-    assert(ClassID == GPRRC || ClassID == VSXRC);
-    return ClassID == GPRRC ? 32 : 64;
+    assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
+    return ClassID == VSXRC ? 64 : 32;
   }
   assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
   return 32;
@@ -608,8 +594,14 @@ unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
 unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
   if (Vector)
     return ST->hasVSX() ? VSXRC : VRRC;
-  else if (Ty && Ty->getScalarType()->isFloatTy())
+  else if (Ty && (Ty->getScalarType()->isFloatTy() ||
+                  Ty->getScalarType()->isDoubleTy()))
     return ST->hasVSX() ? VSXRC : FPRRC;
+  else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
+                  Ty->getScalarType()->isPPC_FP128Ty()))
+    return VRRC;
+  else if (Ty && Ty->getScalarType()->isHalfTy())
+    return VSXRC;
   else
     return GPRRC;
 }
@@ -646,9 +638,10 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
     return CacheLineSize;
 
   // On P7, P8 or P9 we have a cache line size of 128.
-  unsigned Directive = ST->getDarwinDirective();
+  unsigned Directive = ST->getCPUDirective();
+  // Assume that Future CPU has the same cache line size as the others.
   if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
-      Directive == PPC::DIR_PWR9)
+      Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
     return 128;
 
   // On other processors return a default of 64 bytes.
@@ -662,7 +655,7 @@ unsigned PPCTTIImpl::getPrefetchDistance() const {
 }
 
 unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
-  unsigned Directive = ST->getDarwinDirective();
+  unsigned Directive = ST->getCPUDirective();
   // The 440 has no SIMD support, but floating-point instructions
   // have a 5-cycle latency, so unroll by 5x for latency hiding.
   if (Directive == PPC::DIR_440)
@@ -680,8 +673,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
   // For P7 and P8, floating-point instructions have a 6-cycle latency and
   // there are two execution units, so unroll by 12x for latency hiding.
   // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
+  // Assume that future is the same as the others.
   if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
-      Directive == PPC::DIR_PWR9)
+      Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE)
     return 12;
 
   // For most things, modern systems have two execution units (and
@@ -716,10 +710,13 @@ int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
   return Cost * 2;
 }
 
-int PPCTTIImpl::getArithmeticInstrCost(
-    unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
-    TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
-    TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
+int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                       TTI::OperandValueKind Op1Info,
+                                       TTI::OperandValueKind Op2Info,
+                                       TTI::OperandValueProperties Opd1PropInfo,
+                                       TTI::OperandValueProperties Opd2PropInfo,
+                                       ArrayRef<const Value *> Args,
+                                       const Instruction *CxtI) {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   // Fallback to the default implementation.
@@ -829,8 +826,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
   return Cost;
 }
 
-int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                unsigned AddressSpace, const Instruction *I) {
+int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                MaybeAlign Alignment, unsigned AddressSpace,
+                                const Instruction *I) {
   // Legalize the type.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@@ -888,7 +886,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
   // to be decomposed based on the alignment factor.
 
   // Add the cost of each scalar load or store.
-  Cost += LT.first*(SrcBytes/Alignment-1);
+  assert(Alignment);
+  Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
 
   // For a vector type, there is also scalarization overhead (only for
   // stores, loads are expanded using the vector-load + permutation sequence,
@@ -919,7 +918,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
 
   // Firstly, the cost of load/store operation.
-  int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+  int Cost =
+      getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
 
   // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
   // (at least in the sense that there need only be one non-loop-invariant
@@ -931,6 +931,20 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   return Cost;
 }
 
+unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+      ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
+  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
+
+unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+      ArrayRef<Type*> Tys, FastMathFlags FMF,
+      unsigned ScalarizationCostPassed) {
+  if (ID == Intrinsic::bswap && ST->hasP9Vector())
+    return TLI->getTypeLegalizationCost(DL, RetTy).first;
+  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+                                      ScalarizationCostPassed);
+}
+
 bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
                             LoopInfo *LI, DominatorTree *DT,
                             AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 83a70364bf68..35388d14f606 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -46,9 +46,10 @@ public:
   using BaseT::getIntImmCost;
   int getIntImmCost(const APInt &Imm, Type *Ty);
 
-  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
-  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                    Type *Ty);
+  int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                        Type *Ty);
+  int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                          Type *Ty);
 
   unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
 
@@ -90,14 +91,15 @@ public:
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
-      ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                        const Instruction *I = nullptr);
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
                          const Instruction *I = nullptr);
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
-  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+  int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                       unsigned AddressSpace, const Instruction *I = nullptr);
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                  unsigned Factor,
@@ -106,6 +108,11 @@ public:
                                  unsigned AddressSpace,
                                  bool UseMaskForCond = false,
                                  bool UseMaskForGaps = false);
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+            ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+            ArrayRef<Type*> Tys, FastMathFlags FMF,
+            unsigned ScalarizationCostPassed = UINT_MAX);
 
   /// @}
 };
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 5e150be544ed..3e6d1c7939f1 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 99b5dec74668..649bd648a6cf 100644
--- a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -23,7 +23,7 @@ Target &llvm::getThePPC64LETarget() {
   return ThePPC64LETarget;
 }
 
-extern "C" void LLVMInitializePowerPCTargetInfo() {
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTargetInfo() {
   RegisterTarget<Triple::ppc, /*HasJIT=*/true> X(getThePPC32Target(), "ppc32",
                                                  "PowerPC 32", "PPC");
 
-- 
cgit v1.2.3