34 files changed, 651 insertions, 298 deletions
diff --git a/docs/CMake.rst b/docs/CMake.rst
index 9ec6b0a2416e..4e5feae99931 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -197,12 +197,6 @@ CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``.
 **CMAKE_CXX_FLAGS**:STRING
   Extra flags to use when compiling C++ source files.
 
-**BUILD_SHARED_LIBS**:BOOL
-  Flag indicating if shared libraries will be built. Its default value is
-  OFF. This option is only recommended for use by LLVM developers.
-  On Windows, shared libraries may be used when building with MinGW, including
-  mingw-w64, but not when building with the Microsoft toolchain.
-
 .. _LLVM-specific variables:
 
 LLVM-specific variables
@@ -445,6 +439,30 @@ LLVM-specific variables
   $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can
   be used to override the default system tools. 
 
+**LLVM_BUILD_LLVM_DYLIB**:BOOL
+  If enabled, the target for building the libLLVM shared library is added.
+  This library contains all of LLVM's components in a single shared library.
+  Defaults to OFF. This cannot be used in conjunction with BUILD_SHARED_LIBS.
+  Tools will only be linked to the libLLVM shared library if LLVM_LINK_LLVM_DYLIB
+  is also ON.
+  The components in the library can be customised by setting LLVM_DYLIB_COMPONENTS
+  to a list of the desired components.
+
+**LLVM_LINK_LLVM_DYLIB**:BOOL
+  If enabled, tools will be linked with the libLLVM shared library. Defaults
+  to OFF. Setting LLVM_LINK_LLVM_DYLIB to ON also sets LLVM_BUILD_LLVM_DYLIB
+  to ON.
+
+**BUILD_SHARED_LIBS**:BOOL
+  Flag indicating if each LLVM component (e.g. Support) is built as a shared
+  library (ON) or as a static library (OFF). Its default value is OFF. On
+  Windows, shared libraries may be used when building with MinGW, including
+  mingw-w64, but not when building with the Microsoft toolchain.
+ 
+  .. note:: BUILD_SHARED_LIBS is only recommended for use by LLVM developers.
+            If you want to build LLVM as a shared library, you should use the
+            ``LLVM_BUILD_LLVM_DYLIB`` option.
+
 Executing the test suite
 ========================
 
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 7b284d59656b..73831f545390 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -89,6 +89,30 @@ Non-comprehensive list of changes in this release
   the node ``N`` is guaranteed not to be the last in the list, it is safe to
   call ``&*++N->getIterator()`` directly.
 
+* The `Kaleidoscope tutorials <tutorial/index.html>`_ have been updated to use
+  the ORC JIT APIs.
+
+* ORC now has a basic set of C bindings.
+
+* Optional support for linking clang and the LLVM tools with a single libLLVM
+  shared library. To enable this, pass ``-DLLVM_LINK_LLVM_DYLIB=ON`` to CMake.
+  See `Building LLVM with CMake`_ for more details.
+
+* The optimization to move the prologue and epilogue of functions in colder
+  code path (shrink-wrapping) is now enabled by default.
+
+* A new target-independent gcc-compatible emulated Thread Local Storage mode
+  is added.  When ``-femultated-tls`` flag is used, all accesses to TLS
+  variables are converted to calls to ``__emutls_get_address`` in the runtime
+  library.
+
+* MSVC compatible exception handling has been completely overhauled. New
+  instructions have been introduced to facilitate this:
+  `New exception handling instructions <ExceptionHandling.html#new-exception-handling-instructions>`_. 
+  While we have done our best to test this feature thoroughly, it would
+  not be completely surprising if there were a few lingering issues that
+  early adopters might bump into.
+
 .. NOTE
    For small 1-3 sentence descriptions, just add an entry at the end of
    this list. If your description won't fit comfortably in one bullet
@@ -115,7 +139,7 @@ Changes to the ARM Backends
 During this release the AArch64 target has:
 
 * Added support for more sanitizers (MSAN, TSAN) and made them compatible with
-  all VMA kernel configurations (kurrently tested on 39 and 42 bits).
+  all VMA kernel configurations (currently tested on 39 and 42 bits).
 * Gained initial LLD support in the new ELF back-end
 * Extended the Load/Store optimiser and cleaned up some of the bad decisions
   made earlier.
@@ -218,9 +242,16 @@ Changes to the X86 Target
 * More efficient code for wide integer compares. (E.g. 64-bit compares
   on 32-bit targets.)
 
-* Tail call support for ``thiscall``, ``stdcall`, ``vectorcall``, and
+* Tail call support for ``thiscall``, ``stdcall``, ``vectorcall``, and
   ``fastcall`` functions.
 
+Changes to the Hexagon Target
+-----------------------------
+
+In addition to general code size and performance improvements, Hexagon target
+now has basic support for Hexagon V60 architecture and Hexagon Vector
+Extensions (HVX).
+
 Changes to the AVR Target
 -------------------------
 
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index edade3164a3c..f1ea2c03f13c 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -544,6 +544,11 @@ namespace llvm {
       return true;
     }
 
+    // Returns true if any segment in the live range contains any of the
+    // provided slot indexes.  Slots which occur in holes between
+    // segments will not cause the function to return true.
+    bool isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const;
+
     bool operator<(const LiveRange& other) const {
       const SlotIndex &thisIndex = beginIndex();
       const SlotIndex &otherIndex = other.beginIndex();
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index 1b75c60631b0..2f8c3c499295 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -1539,16 +1539,7 @@ public:
   }
 
   CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
-                       ArrayRef<OperandBundleDef> OpBundles = None,
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
-    CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
-    if (isa<FPMathOperator>(CI))
-      CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
-    return Insert(CI, Name);
-  }
-
-  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
-                       const Twine &Name, MDNode *FPMathTag = nullptr) {
     PointerType *PTy = cast<PointerType>(Callee->getType());
     FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
     return CreateCall(FTy, Callee, Args, Name, FPMathTag);
@@ -1563,6 +1554,15 @@ public:
     return Insert(CI, Name);
   }
 
+  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+                       ArrayRef<OperandBundleDef> OpBundles,
+                       const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
+    if (isa<FPMathOperator>(CI))
+      CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
+    return Insert(CI, Name);
+  }
+
   CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
                        const Twine &Name = "", MDNode *FPMathTag = nullptr) {
     return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index aba48ca6fa9e..28e1fd90fdf6 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -2512,6 +2512,14 @@ public:
     return block_begin() + getNumOperands();
   }
 
+  iterator_range<block_iterator> blocks() {
+    return make_range(block_begin(), block_end());
+  }
+
+  iterator_range<const_block_iterator> blocks() const {
+    return make_range(block_begin(), block_end());
+  }
+
   op_range incoming_values() { return operands(); }
 
   const_op_range incoming_values() const { return operands(); }
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index abc655ac34ca..26aa46fb6c2a 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -213,8 +213,11 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
   if (!Spillable)
     return;
 
-  // Mark li as unspillable if all live ranges are tiny.
-  if (li.isZeroLength(LIS.getSlotIndexes())) {
+  // Mark li as unspillable if all live ranges are tiny and the interval
+  // is not live at any reg mask.  If the interval is live at a reg mask
+  // spilling may be required.
+  if (li.isZeroLength(LIS.getSlotIndexes()) &&
+      !li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
     li.markNotSpillable();
     return;
   }
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index bb3488348f24..50158006211d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -748,6 +748,40 @@ void LiveRange::flushSegmentSet() {
   verify();
 }
 
+bool LiveRange::isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const {
+  ArrayRef<SlotIndex>::iterator SlotI = Slots.begin();
+  ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+  // If there are no regmask slots, we have nothing to search.
+  if (SlotI == SlotE)
+    return false;
+
+  // Start our search at the first segment that ends after the first slot.
+  const_iterator SegmentI = find(*SlotI);
+  const_iterator SegmentE = end();
+
+  // If there are no segments that end after the first slot, we're done.
+  if (SegmentI == SegmentE)
+    return false;
+
+  // Look for each slot in the live range.
+  for ( ; SlotI != SlotE; ++SlotI) {
+    // Go to the next segment that ends after the current slot.
+    // The slot may be within a hole in the range.
+    SegmentI = advanceTo(SegmentI, *SlotI);
+    if (SegmentI == SegmentE)
+      return false;
+
+    // If this segment contains the slot, we're done.
+    if (SegmentI->contains(*SlotI))
+      return true;
+    // Otherwise, look for the next slot.
+  }
+
+  // We didn't find a segment containing any of the slots.
+  return false;
+}
+
 void LiveInterval::freeSubRange(SubRange *S) {
   S->~SubRange();
   // Memory was allocated with BumpPtr allocator and is not freed here.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5d572c4c2b02..f7836345f720 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1637,6 +1637,7 @@ struct FloatSignAsInt {
   MachinePointerInfo FloatPointerInfo;
   SDValue IntValue;
   APInt SignMask;
+  uint8_t SignBit;
 };
 }
 
@@ -1653,6 +1654,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
   if (TLI.isTypeLegal(IVT)) {
     State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
     State.SignMask = APInt::getSignBit(NumBits);
+    State.SignBit = NumBits - 1;
     return;
   }
 
@@ -1689,6 +1691,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
                                   IntPtr, State.IntPointerInfo, MVT::i8,
                                   false, false, false, 0);
   State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+  State.SignBit = 7;
 }
 
 /// Replace the integer value produced by getSignAsIntValue() with a new value
@@ -1731,15 +1734,38 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
     return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
   }
 
-  // Transform values to integer, copy the sign bit and transform back.
+  // Transform Mag value to integer, and clear the sign bit.
   FloatSignAsInt MagAsInt;
   getSignAsIntValue(MagAsInt, DL, Mag);
-  assert(SignAsInt.SignMask == MagAsInt.SignMask);
-  SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
-  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
+  EVT MagVT = MagAsInt.IntValue.getValueType();
+  SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT);
+  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue,
                                     ClearSignMask);
-  SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
 
+  // Get the signbit at the right position for MagAsInt.
+  int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
+  if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
+    if (ShiftAmount > 0) {
+      SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT);
+      SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst);
+    } else if (ShiftAmount < 0) {
+      SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT);
+      SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst);
+    }
+    SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
+  } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+    SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
+    if (ShiftAmount > 0) {
+      SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT);
+      SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst);
+    } else if (ShiftAmount < 0) {
+      SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT);
+      SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst);
+    }
+  }
+
+  // Store the part with the modified sign and convert back to float.
+  SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit);
   return modifySignAsInt(MagAsInt, DL, CopiedSign);
 }
 
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 886c5f6070c1..14ec91159809 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -257,10 +257,14 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
         if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
           if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
             calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
-        if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
-          if (getCleanupRetUnwindDest(InnerCleanupPad) ==
-              CatchSwitch->getUnwindDest())
+        if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
+          BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
+          // If a nested cleanup pad reports a null unwind destination and the
+          // enclosing catch pad doesn't it must be post-dominated by an
+          // unreachable instruction.
+          if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
             calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+        }
       }
     }
     int CatchHigh = FuncInfo.getLastStateNumber();
@@ -360,10 +364,14 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
       if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
         if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
           calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
-      if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
-        if (getCleanupRetUnwindDest(InnerCleanupPad) ==
-            CatchSwitch->getUnwindDest())
+      if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
+        BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
+        // If a nested cleanup pad reports a null unwind destination and the
+        // enclosing catch pad doesn't it must be post-dominated by an
+        // unreachable instruction.
+        if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
           calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+      }
     }
   } else {
     auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
index 331d2141b0e2..65f2a2f51f9b 100644
--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -1,8 +1,17 @@
 include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
+if( HAVE_LIBDL )
+    set(LLVM_INTEL_JIT_LIBS ${CMAKE_DL_LIBS})
+endif()
+if( HAVE_LIBPTHREAD )
+    set(LLVM_INTEL_JIT_LIBS pthread ${LLVM_INTEL_JIT_LIBS})
+endif()
+
 add_llvm_library(LLVMIntelJITEvents
   IntelJITEventListener.cpp
   jitprofiling.c
 
-  LINK_LIBS pthread ${CMAKE_DL_LIBS}
+  LINK_LIBS ${LLVM_INTEL_JIT_LIBS}
 )
+
+add_dependencies(LLVMIntelJITEvents LLVMCodeGen)
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
index afea3ecccda4..8584500d64ad 100644
--- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -21,4 +21,4 @@
 type = OptionalLibrary
 name = IntelJITEvents
 parent = ExecutionEngine
-required_libraries = Core DebugInfoDWARF Support Object ExecutionEngine
+required_libraries = CodeGen Core DebugInfoDWARF Support Object ExecutionEngine
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 0e5d3ac50379..11afcf7f0adc 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -338,9 +338,9 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     // FIXME: Do we need to support these?
     .Cases("i786", "i886", "i986", Triple::x86)
     .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64)
-    .Case("powerpc", Triple::ppc)
-    .Cases("powerpc64", "ppu", Triple::ppc64)
-    .Case("powerpc64le", Triple::ppc64le)
+    .Cases("powerpc", "ppc32", Triple::ppc)
+    .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64)
+    .Cases("powerpc64le", "ppc64le", Triple::ppc64le)
     .Case("xscale", Triple::arm)
     .Case("xscaleeb", Triple::armeb)
     .Case("aarch64", Triple::aarch64)
@@ -359,7 +359,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("r600", Triple::r600)
     .Case("amdgcn", Triple::amdgcn)
     .Case("hexagon", Triple::hexagon)
-    .Case("s390x", Triple::systemz)
+    .Cases("s390x", "systemz", Triple::systemz)
     .Case("sparc", Triple::sparc)
     .Case("sparcel", Triple::sparcel)
     .Cases("sparcv9", "sparc64", Triple::sparcv9)
diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td
index d51e2ccc8a35..361d21440a97 100644
--- a/lib/Target/Sparc/SparcInstrAliases.td
+++ b/lib/Target/Sparc/SparcInstrAliases.td
@@ -267,9 +267,9 @@ defm : int_cond_alias<"neg",  0b0110>;
 defm : int_cond_alias<"vc",   0b1111>;
 defm : int_cond_alias<"vs",   0b0111>;
 
-defm : fp_cond_alias<"a",     0b0000>;
-defm : fp_cond_alias<"",      0b0000>; // same as a; gnu asm, not in manual
-defm : fp_cond_alias<"n",     0b1000>;
+defm : fp_cond_alias<"a",     0b1000>;
+defm : fp_cond_alias<"",      0b1000>; // same as a; gnu asm, not in manual
+defm : fp_cond_alias<"n",     0b0000>;
 defm : fp_cond_alias<"u",     0b0111>;
 defm : fp_cond_alias<"g",     0b0110>;
 defm : fp_cond_alias<"ug",    0b0101>;
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 133bd0e1772a..135c32bf8c3b 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -69,19 +69,15 @@ public:
 class X86AsmBackend : public MCAsmBackend {
   const StringRef CPU;
   bool HasNopl;
-  uint64_t MaxNopLength;
+  const uint64_t MaxNopLength;
 public:
-  X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) {
+  X86AsmBackend(const Target &T, StringRef CPU)
+      : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
     HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
               CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
               CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
               CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
               CPU != "c3" && CPU != "c3-2";
-    // Max length of true long nop instruction is 15 bytes.
-    // Max length of long nop replacement instruction is 7 bytes.
-    // Taking into account SilverMont architecture features max length of nops
-    // is reduced for it to achieve better performance.
-    MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15;
   }
 
   unsigned getNumFixupKinds() const override {
@@ -299,7 +295,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
 /// bytes.
 /// \return - true on success, false on failure
 bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
-  static const uint8_t TrueNops[10][10] = {
+  static const uint8_t Nops[10][10] = {
     // nop
     {0x90},
     // xchg %ax,%ax
@@ -322,31 +318,17 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
     {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   };
 
-  // Alternative nop instructions for CPUs which don't support long nops.
-  static const uint8_t AltNops[7][10] = {
-      // nop
-      {0x90},
-      // xchg %ax,%ax
-      {0x66, 0x90},
-      // lea 0x0(%esi),%esi
-      {0x8d, 0x76, 0x00},
-      // lea 0x0(%esi),%esi
-      {0x8d, 0x74, 0x26, 0x00},
-      // nop + lea 0x0(%esi),%esi
-      {0x90, 0x8d, 0x74, 0x26, 0x00},
-      // lea 0x0(%esi),%esi
-      {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 },
-      // lea 0x0(%esi),%esi
-      {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00},
-  };
-
-  // Select the right NOP table.
-  // FIXME: Can we get if CPU supports long nops from the subtarget somehow?
-  const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops;
-  assert(HasNopl || MaxNopLength <= 7);
+  // This CPU doesn't support long nops. If needed add more.
+  // FIXME: Can we get this from the subtarget somehow?
+  // FIXME: We could generated something better than plain 0x90.
+  if (!HasNopl) {
+    for (uint64_t i = 0; i < Count; ++i)
+      OW->write8(0x90);
+    return true;
+  }
 
-  // Emit as many largest nops as needed, then emit a nop of the remaining
-  // length.
+  // 15 is the longest single nop instruction.  Emit as many 15-byte nops as
+  // needed, then emit a nop of the remaining length.
   do {
     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 7f8ce4768c00..f5ffe0cf7e88 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -192,10 +192,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
   return 0;
 }
 
-static bool isEAXLiveIn(MachineFunction &MF) {
-  for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
-       EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
-    unsigned Reg = II->first;
+static bool isEAXLiveIn(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
+    unsigned Reg = RegMask.PhysReg;
 
     if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
         Reg == X86::AH || Reg == X86::AL)
@@ -261,7 +260,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
       // load the offset into a register and do one sub/add
       unsigned Reg = 0;
 
-      if (isSub && !isEAXLiveIn(*MBB.getParent()))
+      if (isSub && !isEAXLiveIn(MBB))
         Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
       else
         Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
@@ -1133,8 +1132,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
     AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
   if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
-    // Check whether EAX is livein for this function.
-    bool isEAXAlive = isEAXLiveIn(MF);
+    // Check whether EAX is livein for this block.
+    bool isEAXAlive = isEAXLiveIn(MBB);
 
     if (isEAXAlive) {
       // Sanity check that EAX is not livein for this function.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 49be64883939..6f0199b015cd 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -5896,7 +5896,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
 
   def : Pat<(_.EltVT (OpNode (load addr:$src))),
             (!cast<Instruction>(NAME#SUFF#Zm)
-                (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
+                (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
 }
 
 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 3af4afb903fe..22a95fa03f7c 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -29,6 +29,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -186,32 +187,8 @@ bool PruneEH::SimplifyFunction(Function *F) {
   for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
       if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
-        SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
-        SmallVector<OperandBundleDef, 1> OpBundles;
-        II->getOperandBundlesAsDefs(OpBundles);
-
-        // Insert a call instruction before the invoke.
-        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
-                                          "", II);
-        Call->takeName(II);
-        Call->setCallingConv(II->getCallingConv());
-        Call->setAttributes(II->getAttributes());
-        Call->setDebugLoc(II->getDebugLoc());
-
-        // Anything that used the value produced by the invoke instruction
-        // now uses the value produced by the call instruction.  Note that we
-        // do this even for void functions and calls with no uses so that the
-        // callgraph edge is updated.
-        II->replaceAllUsesWith(Call);
         BasicBlock *UnwindBlock = II->getUnwindDest();
-        UnwindBlock->removePredecessor(II->getParent());
-
-        // Insert a branch to the normal destination right before the
-        // invoke.
-        BranchInst::Create(II->getNormalDest(), II);
-
-        // Finally, delete the invoke instruction!
-        BB->getInstList().pop_back();
+        removeUnwindEdge(&*BB);
 
         // If the unwind block is now dead, nuke it.
         if (pred_empty(UnwindBlock))
@@ -251,23 +228,39 @@ void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
   assert(pred_empty(BB) && "BB is not dead!");
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
 
+  Instruction *TokenInst = nullptr;
+
   CallGraphNode *CGN = CG[BB->getParent()];
   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
     --I;
-    if (CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (!isa<IntrinsicInst>(I))
-        CGN->removeCallEdgeFor(CI);
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
-      CGN->removeCallEdgeFor(II);
+
+    if (I->getType()->isTokenTy()) {
+      TokenInst = &*I;
+      break;
+    }
+
+    if (auto CS = CallSite (&*I)) {
+      const Function *Callee = CS.getCalledFunction();
+      if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
+        CGN->removeCallEdgeFor(CS);
+      else if (!Callee->isIntrinsic())
+        CGN->removeCallEdgeFor(CS);
+    }
+
     if (!I->use_empty())
       I->replaceAllUsesWith(UndefValue::get(I->getType()));
   }
 
-  // Get the list of successors of this block.
-  std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+  if (TokenInst) {
+    if (!isa<TerminatorInst>(TokenInst))
+      changeToUnreachable(TokenInst->getNextNode(), /*UseLLVMTrap=*/false);
+  } else {
+    // Get the list of successors of this block.
+    std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
 
-  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
-    Succs[i]->removePredecessor(BB);
+    for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+      Succs[i]->removePredecessor(BB);
 
-  BB->eraseFromParent();
+    BB->eraseFromParent();
+  }
 }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 2101225ed9f7..acfdec43d21a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4799,6 +4799,17 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
       DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n");
       return;
     }
+    // Bail out if we have a PHI on an EHPad that gets a value from a
+    // CatchSwitchInst.  Because the CatchSwitchInst cannot be split, there is
+    // no good place to stick any instructions.
+    if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
+       auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
+       if (isa<FuncletPadInst>(FirstNonPHI) ||
+           isa<CatchSwitchInst>(FirstNonPHI))
+         for (BasicBlock *PredBB : PN->blocks())
+           if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
+             return;
+    }
   }
 
 #ifndef NDEBUG
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2c0d317d16bc..17c25dfffc10 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1409,14 +1409,15 @@ private:
 /// different operations.
 class LoopVectorizationCostModel {
 public:
-  LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
-                             LoopInfo *LI, LoopVectorizationLegality *Legal,
+  LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
+                             LoopVectorizationLegality *Legal,
                              const TargetTransformInfo &TTI,
                              const TargetLibraryInfo *TLI, DemandedBits *DB,
                              AssumptionCache *AC, const Function *F,
-                             const LoopVectorizeHints *Hints)
-      : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
-        AC(AC), TheFunction(F), Hints(Hints) {}
+                             const LoopVectorizeHints *Hints,
+                             SmallPtrSetImpl<const Value *> &ValuesToIgnore)
+      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
+        TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
 
   /// Information about vectorization costs
   struct VectorizationFactor {
@@ -1464,9 +1465,6 @@ public:
   SmallVector<RegisterUsage, 8>
   calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs);
 
-  /// Collect values we want to ignore in the cost model.
-  void collectValuesToIgnore();
-
 private:
   /// Returns the expected execution cost. The unit of the cost does
   /// not matter because we use the 'cost' units to compare different
@@ -1498,8 +1496,8 @@ public:
 
   /// The loop that we evaluate.
   Loop *TheLoop;
-  /// Predicated scalar evolution analysis.
-  PredicatedScalarEvolution &PSE;
+  /// Scev analysis.
+  ScalarEvolution *SE;
   /// Loop Info analysis.
   LoopInfo *LI;
   /// Vectorization legality.
@@ -1508,17 +1506,13 @@ public:
   const TargetTransformInfo &TTI;
   /// Target Library Info.
   const TargetLibraryInfo *TLI;
-  /// Demanded bits analysis.
+  /// Demanded bits analysis
   DemandedBits *DB;
-  /// Assumption cache.
-  AssumptionCache *AC;
   const Function *TheFunction;
-  /// Loop Vectorize Hint.
+  // Loop Vectorize Hint.
   const LoopVectorizeHints *Hints;
-  /// Values to ignore in the cost model.
-  SmallPtrSet<const Value *, 16> ValuesToIgnore;
-  /// Values to ignore in the cost model when VF > 1.
-  SmallPtrSet<const Value *, 16> VecValuesToIgnore;
+  // Values to ignore in the cost model.
+  const SmallPtrSetImpl<const Value *> &ValuesToIgnore;
 };
 
 /// \brief This holds vectorization requirements that must be verified late in
@@ -1763,10 +1757,19 @@ struct LoopVectorize : public FunctionPass {
       return false;
     }
 
+    // Collect values we want to ignore in the cost model. This includes
+    // type-promoting instructions we identified during reduction detection.
+    SmallPtrSet<const Value *, 32> ValuesToIgnore;
+    CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore);
+    for (auto &Reduction : *LVL.getReductionVars()) {
+      RecurrenceDescriptor &RedDes = Reduction.second;
+      SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
+      ValuesToIgnore.insert(Casts.begin(), Casts.end());
+    }
+
     // Use the cost model.
-    LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
-                                  &Hints);
-    CM.collectValuesToIgnore();
+    LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC,
+                                  F, &Hints, ValuesToIgnore);
 
     // Check the function attributes to find out if this function should be
     // optimized for size.
@@ -4636,6 +4639,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
 
   // Holds all interleaved store groups temporarily.
   SmallSetVector<InterleaveGroup *, 4> StoreGroups;
+  // Holds all interleaved load groups temporarily.
+  SmallSetVector<InterleaveGroup *, 4> LoadGroups;
 
   // Search the load-load/write-write pair B-A in bottom-up order and try to
   // insert B into the interleave group of A according to 3 rules:
@@ -4663,6 +4668,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
 
     if (A->mayWriteToMemory())
       StoreGroups.insert(Group);
+    else
+      LoadGroups.insert(Group);
 
     for (auto II = std::next(I); II != E; ++II) {
       Instruction *B = II->first;
@@ -4710,6 +4717,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
   for (InterleaveGroup *Group : StoreGroups)
     if (Group->getNumMembers() != Group->getFactor())
       releaseGroup(Group);
+
+  // Remove interleaved load groups that don't have the first and last member.
+  // This guarantees that we won't do speculative out of bounds loads.
+  for (InterleaveGroup *Group : LoadGroups)
+    if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1))
+      releaseGroup(Group);
 }
 
 LoopVectorizationCostModel::VectorizationFactor
@@ -4734,7 +4747,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   }
 
   // Find the trip count.
-  unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
   DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
 
   MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
@@ -4936,7 +4949,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
     return 1;
 
   // Do not interleave loops with a relatively small trip count.
-  unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
   if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
     return 1;
 
@@ -5164,15 +5177,15 @@ LoopVectorizationCostModel::calculateRegisterUsage(
     // Ignore instructions that are never used within the loop.
     if (!Ends.count(I)) continue;
 
+    // Skip ignored values.
+    if (ValuesToIgnore.count(I))
+      continue;
+
     // Remove all of the instructions that end at this location.
     InstrList &List = TransposeEnds[i];
     for (unsigned int j = 0, e = List.size(); j < e; ++j)
       OpenIntervals.erase(List[j]);
 
-    // Skip ignored values.
-    if (ValuesToIgnore.count(I))
-      continue;
-
     // For each VF find the maximum usage of registers.
     for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
       if (VFs[j] == 1) {
@@ -5182,12 +5195,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(
 
       // Count the number of live intervals.
       unsigned RegUsage = 0;
-      for (auto Inst : OpenIntervals) {
-        // Skip ignored values for VF > 1.
-        if (VecValuesToIgnore.count(Inst))
-          continue;
+      for (auto Inst : OpenIntervals)
         RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
-      }
       MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
     }
 
@@ -5331,7 +5340,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   if (VF > 1 && MinBWs.count(I))
     RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
   Type *VectorTy = ToVectorTy(RetTy, VF);
-  auto SE = PSE.getSE();
 
   // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
@@ -5633,79 +5641,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
   return false;
 }
 
-void LoopVectorizationCostModel::collectValuesToIgnore() {
-  // Ignore ephemeral values.
-  CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
-
-  // Ignore type-promoting instructions we identified during reduction
-  // detection.
-  for (auto &Reduction : *Legal->getReductionVars()) {
-    RecurrenceDescriptor &RedDes = Reduction.second;
-    SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
-    VecValuesToIgnore.insert(Casts.begin(), Casts.end());
-  }
-
-  // Ignore induction phis that are only used in either GetElementPtr or ICmp
-  // instruction to exit loop. Induction variables usually have large types and
-  // can have big impact when estimating register usage.
-  // This is for when VF > 1.
-  for (auto &Induction : *Legal->getInductionVars()) {
-    auto *PN = Induction.first;
-    auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
-
-    // Check that the PHI is only used by the induction increment (UpdateV) or
-    // by GEPs. Then check that UpdateV is only used by a compare instruction or
-    // the loop header PHI.
-    // FIXME: Need precise def-use analysis to determine if this instruction
-    // variable will be vectorized.
-    if (std::all_of(PN->user_begin(), PN->user_end(),
-                    [&](const User *U) -> bool {
-                      return U == UpdateV || isa<GetElementPtrInst>(U);
-                    }) &&
-        std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
-                    [&](const User *U) -> bool {
-                      return U == PN || isa<ICmpInst>(U);
-                    })) {
-      VecValuesToIgnore.insert(PN);
-      VecValuesToIgnore.insert(UpdateV);
-    }
-  }
-
-  // Ignore instructions that will not be vectorized.
-  // This is for when VF > 1.
-  for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be;
-       ++bb) {
-    for (auto &Inst : **bb) {
-      switch (Inst.getOpcode()) {
-      case Instruction::GetElementPtr: {
-        // Ignore GEP if its last operand is an induction variable so that it is
-        // a consecutive load/store and won't be vectorized as scatter/gather
-        // pattern.
-
-        GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
-        unsigned NumOperands = Gep->getNumOperands();
-        unsigned InductionOperand = getGEPInductionOperand(Gep);
-        bool GepToIgnore = true;
-
-        // Check that all of the gep indices are uniform except for the
-        // induction operand.
-        for (unsigned i = 0; i != NumOperands; ++i) {
-          if (i != InductionOperand &&
-              !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
-                                            TheLoop)) {
-            GepToIgnore = false;
-            break;
-          }
-        }
-
-        if (GepToIgnore)
-          VecValuesToIgnore.insert(&Inst);
-        break;
-      }
-      }
-    }
-  }
-}
 
 void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
                                              bool IfPredicateStore) {
diff --git a/test/CodeGen/AArch64/fcopysign.ll b/test/CodeGen/AArch64/fcopysign.ll
new file mode 100644
index 000000000000..6bda33cf76ef
--- /dev/null
+++ b/test/CodeGen/AArch64/fcopysign.ll
@@ -0,0 +1,23 @@
+; RUN: llc -o - %s | FileCheck %s
+; Check that selection dag legalization of fcopysign works in cases with
+; different modes for the arguments.
+target triple = "aarch64--"
+
+declare fp128 @llvm.copysign.f128(fp128, fp128)
+
+@val = global double zeroinitializer, align 8
+
+; CHECK-LABEL: copysign0
+; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val]
+; CHECK: and [[ANDREG:x[0-9]+]], [[REG]], #0x8000000000000000
+; CHECK: lsr x[[LSRREGNUM:[0-9]+]], [[ANDREG]], #56
+; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7
+; CHECK: strb w[[LSRREGNUM]],
+; CHECK: ldr q{{[0-9]+}},
+define fp128 @copysign0() {
+entry:
+  %v = load double, double* @val, align 8
+  %conv = fpext double %v to fp128
+  %call = tail call fp128 @llvm.copysign.f128(fp128 0xL00000000000000007FFF000000000000, fp128 %conv) #2
+  ret fp128 %call
+}
diff --git a/test/CodeGen/WinEH/wineh-noret-cleanup.ll b/test/CodeGen/WinEH/wineh-noret-cleanup.ll
new file mode 100644
index 000000000000..7d4d833aa9ba
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-noret-cleanup.ll
@@ -0,0 +1,80 @@
+; RUN: sed -e s/.Cxx:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=CXX
+; RUN: sed -e s/.Seh:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=SEH
+
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @__C_specific_handler(...)
+declare void @dummy_filter()
+
+declare void @f(i32)
+
+;Cxx: define void @test() personality i32 (...)* @__CxxFrameHandler3 {
+;Seh: define void @test() personality i32 (...)* @__C_specific_handler {
+entry:
+  invoke void @f(i32 1)
+          to label %invoke.cont unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs1 = catchswitch within none [label %catch.body] unwind label %catch.dispatch.2
+
+catch.body:
+;Cxx: %catch = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+;Seh: %catch = catchpad within %cs1 [void ()* @dummy_filter]
+  invoke void @f(i32 2) [ "funclet"(token %catch) ]
+          to label %unreachable unwind label %terminate
+
+terminate:
+  %cleanup = cleanuppad within %catch []
+  call void @f(i32 3) [ "funclet"(token %cleanup) ]
+  unreachable
+
+unreachable:
+  unreachable
+
+invoke.cont:
+  ret void
+
+catch.dispatch.2:
+  %cs2 = catchswitch within none [label %catch.body.2] unwind to caller
+
+catch.body.2:
+;Cxx: %catch2 = catchpad within %cs2 [i8* null, i32 u0x40, i8* null]
+;Seh: %catch2 = catchpad within %cs2 [void ()* @dummy_filter]
+  unreachable
+}
+
+; CXX-LABEL: test:
+; CXX-LABEL: $ip2state$test:
+; CXX-NEXT:   .long   .Lfunc_begin0@IMGREL
+; CXX-NEXT:   .long   -1
+; CXX-NEXT:   .long   .Ltmp0@IMGREL+1
+; CXX-NEXT:   .long   1
+; CXX-NEXT:   .long   .Ltmp1@IMGREL+1
+; CXX-NEXT:   .long   -1
+; CXX-NEXT:   .long   "?catch$3@?0?test@4HA"@IMGREL
+; CXX-NEXT:   .long   2
+; CXX-NEXT:   .long   .Ltmp2@IMGREL+1
+; CXX-NEXT:   .long   3
+; CXX-NEXT:   .long   .Ltmp3@IMGREL+1
+; CXX-NEXT:   .long   2
+; CXX-NEXT:   .long   "?catch$5@?0?test@4HA"@IMGREL
+; CXX-NEXT:   .long   4
+
+; SEH-LABEL: test:
+; SEH-LABEL: .Llsda_begin0:
+; SEH-NEXT:    .long   .Ltmp0@IMGREL+1
+; SEH-NEXT:    .long   .Ltmp1@IMGREL+1
+; SEH-NEXT:    .long   dummy_filter@IMGREL
+; SEH-NEXT:    .long   .LBB0_3@IMGREL
+; SEH-NEXT:    .long   .Ltmp0@IMGREL+1
+; SEH-NEXT:    .long   .Ltmp1@IMGREL+1
+; SEH-NEXT:    .long   dummy_filter@IMGREL
+; SEH-NEXT:    .long   .LBB0_5@IMGREL
+; SEH-NEXT:    .long   .Ltmp2@IMGREL+1
+; SEH-NEXT:    .long   .Ltmp3@IMGREL+1
+; SEH-NEXT:    .long   "?dtor$2@?0?test@4HA"@IMGREL
+; SEH-NEXT:    .long   0
+; SEH-NEXT:    .long   .Ltmp2@IMGREL+1
+; SEH-NEXT:    .long   .Ltmp3@IMGREL+1
+; SEH-NEXT:    .long   dummy_filter@IMGREL
+; SEH-NEXT:    .long   .LBB0_5@IMGREL
+; SEH-NEXT:  .Llsda_end0:
diff --git a/test/CodeGen/X86/pr26625.ll b/test/CodeGen/X86/pr26625.ll
new file mode 100644
index 000000000000..1b2e227bb59a
--- /dev/null
+++ b/test/CodeGen/X86/pr26625.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mcpu=i686 2>&1 | FileCheck %s
+; PR26625
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386"
+
+define float @x0(float %f) #0 {
+entry:
+  %call = tail call float @sqrtf(float %f) #1
+  ret float %call
+; CHECK-LABEL: x0:
+; CHECK: flds
+; CHECK-NEXT: fsqrt
+; CHECK-NOT: vsqrtss
+}
+
+declare float @sqrtf(float) #0
+
+attributes #0 = { nounwind optsize readnone }
+attributes #1 = { nounwind optsize readnone }
diff --git a/test/CodeGen/X86/regalloc-spill-at-ehpad.ll b/test/CodeGen/X86/regalloc-spill-at-ehpad.ll
new file mode 100644
index 000000000000..32cc9e781fb6
--- /dev/null
+++ b/test/CodeGen/X86/regalloc-spill-at-ehpad.ll
@@ -0,0 +1,75 @@
+; RUN: llc -regalloc=greedy -mtriple=x86_64-pc-windows-msvc  < %s -o - | FileCheck %s
+
+; This test checks for proper handling of a condition where the greedy register
+; allocator encounters a very short interval that contains no uses but does
+; contain an EH pad unwind edge, which requires spilling.  Previously the
+; register allocator marked a interval like this as unspillable, resulting in
+; a compilation failure.
+
+
+; The following checks that the value %p is reloaded within the catch handler.
+; CHECK-LABEL: "?catch$8@?0?test@4HA":
+; CHECK:           .seh_endprologue
+; CHECK:           movq    -16(%rbp), %rax
+; CHECK:           movb    $0, (%rax)
+
+define i32* @test(i32* %a) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %call = call i32 @f()
+  %p = bitcast i32* %a to i8*
+  br i1 undef, label %if.end, label %if.else
+
+if.else:                                          ; preds = %entry
+  br i1 undef, label %cond.false.i, label %if.else.else
+
+if.else.else:                                     ; preds = %if.else
+  br i1 undef, label %cond.true.i, label %cond.false.i
+
+cond.true.i:                                      ; preds = %if.else.else
+  br label %invoke.cont
+
+cond.false.i:                                     ; preds = %if.else.else, %if.else
+  %call.i = invoke i32 @f()
+          to label %invoke.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %cond.false.i
+  %tmp0 = catchswitch within none [label %catch] unwind label %ehcleanup
+
+catch:                                            ; preds = %catch.dispatch
+  %tmp1 = catchpad within %tmp0 [i8* null, i32 64, i8* null]
+  %p.0 = getelementptr inbounds i8, i8* %p, i64 0
+  store i8 0, i8* %p.0, align 8
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) [ "funclet"(token %tmp1) ]
+          to label %noexc unwind label %ehcleanup
+
+noexc:                                            ; preds = %catch
+  unreachable
+
+invoke.cont:                                      ; preds = %cond.false.i, %cond.true.i
+  %cond.i = phi i32 [ %call, %cond.true.i ], [ %call.i, %cond.false.i ]
+  %cmp = icmp eq i32 %cond.i, -1
+  %tmp3 = select i1 %cmp, i32 4, i32 0
+  br label %if.end
+
+if.end:                                           ; preds = %invoke.cont, %entry
+  %state.0 = phi i32 [ %tmp3, %invoke.cont ], [ 4, %entry ]
+  %p.1 = getelementptr inbounds i8, i8* %p, i64 0
+  invoke void @g(i8* %p.1, i32 %state.0)
+          to label %invoke.cont.1 unwind label %ehcleanup
+
+invoke.cont.1:                                    ; preds = %if.end
+  ret i32* %a
+
+ehcleanup:                                        ; preds = %if.end, %catch, %catch.dispatch
+  %tmp4 = cleanuppad within none []
+  cleanupret from %tmp4 unwind to caller
+}
+
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @f()
+declare void @g(i8*, i32)
diff --git a/test/CodeGen/X86/shrink-wrap-chkstk.ll b/test/CodeGen/X86/shrink-wrap-chkstk.ll
index c0b2b45e676f..aecae89aee56 100644
--- a/test/CodeGen/X86/shrink-wrap-chkstk.ll
+++ b/test/CodeGen/X86/shrink-wrap-chkstk.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
 
 ; chkstk cannot come before the usual prologue, since it adjusts ESP.
+; If chkstk is used in the prologue, we also have to be careful about preserving
+; EAX if it is used.
 
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc18.0.0"
@@ -35,3 +37,36 @@ bb2:
 ; CHECK: retl
 
 declare void @inalloca_params(<{ %struct.S }>* inalloca)
+
+declare i32 @doSomething(i32, i32*)
+
+; In this test case, we force usage of EAX before the prologue, and have to
+; compensate before calling __chkstk. It would also be valid for us to avoid
+; shrink wrapping in this case.
+
+define x86_fastcallcc i32 @use_eax_before_prologue(i32 inreg %a, i32 inreg %b) {
+  %tmp = alloca i32, i32 1024, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  ret i32 %tmp.0
+}
+
+; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue
+; CHECK: movl %ecx, %eax
+; CHECK: cmpl %edx, %eax
+; CHECK: jge LBB1_2
+; CHECK: pushl %eax
+; CHECK: movl $4100, %eax
+; CHECK: calll __chkstk
+; CHECK: movl 4100(%esp), %eax
+; CHECK: calll _doSomething
+; CHECK: LBB1_2:
+; CHECK: retl
diff --git a/test/MC/Sparc/sparc-ctrl-instructions.s b/test/MC/Sparc/sparc-ctrl-instructions.s
index bcb625b17e61..ccfa36a0b143 100644
--- a/test/MC/Sparc/sparc-ctrl-instructions.s
+++ b/test/MC/Sparc/sparc-ctrl-instructions.s
@@ -124,6 +124,18 @@
         ! CHECK-NEXT:         ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
         bvs .BB0
 
+        ! CHECK:             fba .BB0                        ! encoding: [0x11,0b10AAAAAA,A,A]
+        ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
+        fba .BB0
+
+        ! CHECK:             fba .BB0                        ! encoding: [0x11,0b10AAAAAA,A,A]
+        ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
+        fb .BB0
+
+        ! CHECK:             fbn .BB0                        ! encoding: [0x01,0b10AAAAAA,A,A]
+        ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
+        fbn .BB0
+
         ! CHECK:             fbu .BB0                        ! encoding: [0x0f,0b10AAAAAA,A,A]
         ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
         fbu .BB0
@@ -131,6 +143,7 @@
         ! CHECK:             fbg .BB0                        ! encoding: [0x0d,0b10AAAAAA,A,A]
         ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
         fbg .BB0
+
         ! CHECK:             fbug .BB0                       ! encoding: [0x0b,0b10AAAAAA,A,A]
         ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
         fbug .BB0
diff --git a/test/MC/Sparc/sparc64-ctrl-instructions.s b/test/MC/Sparc/sparc64-ctrl-instructions.s
index 65bca2990f15..0e7ea25cab94 100644
--- a/test/MC/Sparc/sparc64-ctrl-instructions.s
+++ b/test/MC/Sparc/sparc64-ctrl-instructions.s
@@ -770,6 +770,18 @@
         ! CHECK-NEXT:         ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
         bpos,a,pt %xcc, .BB0
 
+        ! CHECK:             fba %fcc0, .BB0                        ! encoding: [0x11,0b01001AAA,A,A]
+        ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
+        fba %fcc0, .BB0
+
+        ! CHECK:             fba %fcc0, .BB0                        ! encoding: [0x11,0b01001AAA,A,A]
+        ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
+        fb %fcc0, .BB0
+
+        ! CHECK:             fbn %fcc0, .BB0                        ! encoding: [0x01,0b01001AAA,A,A]
+        ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
+        fbn %fcc0, .BB0
+
         ! CHECK:             fbu %fcc0, .BB0                      ! encoding: [0x0f,0b01001AAA,A,A]
         ! CHECK-NEXT:                                        !   fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
         fbu %fcc0, .BB0
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index feac4e4cf039..572487bfdaca 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -22,7 +22,13 @@ inc %eax
 inc %eax
 
 // CHECK: 0:	40                                           	incl	%eax
-// CHECK: 1:	8d b4 26 00 00 00 00                            leal (%esi), %esi
+// CHECK: 1:	90                                           	nop
+// CHECK: 2:	90                                           	nop
+// CHECK: 3:	90                                           	nop
+// CHECK: 4:	90                                           	nop
+// CHECK: 5:	90                                           	nop
+// CHECK: 6:	90                                           	nop
+// CHECK: 7:	90                                           	nop
 // CHECK: 8:	40                                           	incl	%eax
 
 
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 6d21f39d4fd6..cb364102c7f3 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -61,6 +61,15 @@ define void @test_simplify4(%FILE* %fp) {
 ; CHECK-IPRINTF-NEXT: ret void
 }
 
+define void @test_simplify5(%FILE* %fp) {
+; CHECK-LABEL: @test_simplify5(
+  %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt) [ "deopt"() ]
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp) [ "deopt"() ]
+  ret void
+; CHECK-NEXT: ret void
+}
+
 define void @test_no_simplify1(%FILE* %fp) {
 ; CHECK-IPRINTF-LABEL: @test_no_simplify1(
   %fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0
diff --git a/test/Transforms/LoopStrengthReduce/funclet.ll b/test/Transforms/LoopStrengthReduce/funclet.ll
index 5d20646141c4..1bee3706cafa 100644
--- a/test/Transforms/LoopStrengthReduce/funclet.ll
+++ b/test/Transforms/LoopStrengthReduce/funclet.ll
@@ -214,3 +214,32 @@ try.cont.7:                                       ; preds = %try.cont
 
 ; CHECK: catch.dispatch.2:
 ; CHECK: %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
+
+define i32 @test2() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %phi = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  invoke void @reserve()
+          to label %for.inc unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %for.body
+  %tmp18 = catchswitch within none [label %catch.handler] unwind to caller
+
+catch.handler:                                    ; preds = %catch.dispatch
+  %phi.lcssa = phi i32 [ %phi, %catch.dispatch ]
+  %tmp19 = catchpad within %tmp18 [i8* null]
+  catchret from %tmp19 to label %done
+
+done:
+  ret i32 %phi.lcssa
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %phi, 1
+  br label %for.body
+}
+
+; CHECK-LABEL: define i32 @test2(
+; CHECK:      %phi.lcssa = phi i32 [ %phi, %catch.dispatch ]
+; CHECK-NEXT: catchpad within
diff --git a/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
index 0cb845520246..f6f2609e8fbe 100644
--- a/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
+++ b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -16,9 +16,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 1
+  %odd.idx = add nsw i64 %0, 1
+
   %arrayidx = getelementptr inbounds double, double* %b, i64 %0
+  %arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx
+
   %1 = load double, double* %arrayidx, align 8
-  %add = fadd double %1, 1.000000e+00
+  %2 = load double, double* %arrayidx.odd, align 8
+
+  %add = fadd double %1, %2
   %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
   store double %add, double* %arrayidx2, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
diff --git a/test/Transforms/LoopVectorize/X86/reg-usage.ll b/test/Transforms/LoopVectorize/X86/reg-usage.ll
deleted file mode 100644
index 47a6e1029eda..000000000000
--- a/test/Transforms/LoopVectorize/X86/reg-usage.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s
-; REQUIRES: asserts
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@a = global [1024 x i8] zeroinitializer, align 16
-@b = global [1024 x i8] zeroinitializer, align 16
-
-define i32 @foo() {
-; This function has a loop of SAD pattern. Here we check when VF = 16 the
-; register usage doesn't exceed 16.
-;
-; CHECK-LABEL: foo
-; CHECK:      LV(REG): VF = 4
-; CHECK-NEXT: LV(REG): Found max usage: 4
-; CHECK:      LV(REG): VF = 8
-; CHECK-NEXT: LV(REG): Found max usage: 7
-; CHECK:      LV(REG): VF = 16
-; CHECK-NEXT: LV(REG): Found max usage: 13
-
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  %add.lcssa = phi i32 [ %add, %for.body ]
-  ret i32 %add.lcssa
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
-  %conv3 = zext i8 %1 to i32
-  %sub = sub nsw i32 %conv, %conv3
-  %ispos = icmp sgt i32 %sub, -1
-  %neg = sub nsw i32 0, %sub
-  %2 = select i1 %ispos, i32 %sub, i32 %neg
-  %add = add nsw i32 %2, %s.015
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 1024
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
-define i64 @bar(i64* nocapture %a) {
-; CHECK-LABEL: bar
-; CHECK:       LV(REG): VF = 2
-; CHECK:       LV(REG): Found max usage: 4
-;
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  %add2.lcssa = phi i64 [ %add2, %for.body ]
-  ret i64 %add2.lcssa
-
-for.body:
-  %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
-  %0 = load i64, i64* %arrayidx, align 8
-  %add = add nsw i64 %0, %i.012
-  store i64 %add, i64* %arrayidx, align 8
-  %add2 = add nsw i64 %add, %s.011
-  %inc = add nuw nsw i64 %i.012, 1
-  %exitcond = icmp eq i64 %inc, 1024
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
diff --git a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
index fe9d59efc8b3..e6dc39c2afad 100644
--- a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
+++ b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; -vectorizer-maximize-bandwidth is indicated.
 ;
 ; CHECK-label: foo
-; CHECK: LV: Selecting VF: 32.
+; CHECK: LV: Selecting VF: 16.
 define void @foo() {
 entry:
   br label %for.body
diff --git a/test/Transforms/LoopVectorize/interleaved-accesses.ll b/test/Transforms/LoopVectorize/interleaved-accesses.ll
index d7237a5c27dc..54ce3e29293a 100644
--- a/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -292,10 +292,8 @@ for.body:                                         ; preds = %for.body, %entry
 ; }
 
 ; CHECK-LABEL: @even_load(
-; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
-; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
+; CHECK-NOT: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 
 define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
 entry:
diff --git a/test/Transforms/PruneEH/pr26263.ll b/test/Transforms/PruneEH/pr26263.ll
new file mode 100644
index 000000000000..17fafeb68505
--- /dev/null
+++ b/test/Transforms/PruneEH/pr26263.ll
@@ -0,0 +1,56 @@
+; RUN: opt -prune-eh -S < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc"
+
+declare void @neverthrows() nounwind
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+  invoke void @neverthrows()
+          to label %try.cont unwind label %cleanuppad
+
+try.cont:
+  ret void
+
+cleanuppad:
+  %cp = cleanuppad within none []
+  br label %cleanupret
+
+cleanupret:
+  cleanupret from %cp unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: call void @neverthrows()
+
+; CHECK: %[[cp:.*]] = cleanuppad within none []
+; CHECK-NEXT: unreachable
+
+; CHECK: cleanupret from %[[cp]] unwind to caller
+
+define void @test2() personality i32 (...)* @__CxxFrameHandler3 {
+  invoke void @neverthrows()
+          to label %try.cont unwind label %catchswitch
+
+try.cont:
+  ret void
+
+catchswitch:
+  %cs = catchswitch within none [label %catchpad] unwind to caller
+
+catchpad:
+  %cp = catchpad within %cs []
+  unreachable
+
+ret:
+  ret void
+}
+
+; CHECK-LABEL: define void @test2(
+; CHECK: call void @neverthrows()
+
+; CHECK: %[[cs:.*]] = catchswitch within none [label
+
+; CHECK: catchpad within %[[cs]] []
+; CHECK-NEXT: unreachable
+
+declare i32 @__CxxFrameHandler3(...)